1
0
mirror of https://sourceware.org/git/glibc.git synced 2025-07-29 11:41:21 +03:00

* sysdeps/unix/sysv/linux/x86_64/getcontext.S: Use functionally

equivalent, but shorter instructions.
	* sysdeps/unix/sysv/linux/x86_64/sysdep.h: Likewise.
	* sysdeps/unix/sysv/linux/x86_64/setcontext.S: Likewise.
	* sysdeps/unix/sysv/linux/x86_64/clone.S: Likewise.
	* sysdeps/unix/sysv/linux/x86_64/swapcontext.S: Likewise.
	* sysdeps/unix/x86_64/sysdep.S: Likewise.
	* sysdeps/x86_64/strchr.S: Likewise.
	* sysdeps/x86_64/memset.S: Likewise.
	* sysdeps/x86_64/strcspn.S: Likewise.
	* sysdeps/x86_64/strcmp.S: Likewise.
	* sysdeps/x86_64/elf/start.S: Likewise.
	* sysdeps/x86_64/strspn.S: Likewise.
	* sysdeps/x86_64/dl-machine.h: Likewise.
	* sysdeps/x86_64/bsd-_setjmp.S: Likewise.
	* sysdeps/x86_64/bsd-setjmp.S: Likewise.
	* sysdeps/x86_64/strtok.S: Likewise.
This commit is contained in:
Ulrich Drepper
2005-03-31 10:02:53 +00:00
parent 4d6302cf51
commit ee6189855a
174 changed files with 8152 additions and 6793 deletions

View File

@ -1,3 +1,23 @@
2005-03-31 Jakub Jelinek <jakub@redhat.com>
* sysdeps/unix/sysv/linux/x86_64/getcontext.S: Use functionally
equivalent, but shorter instructions.
* sysdeps/unix/sysv/linux/x86_64/sysdep.h: Likewise.
* sysdeps/unix/sysv/linux/x86_64/setcontext.S: Likewise.
* sysdeps/unix/sysv/linux/x86_64/clone.S: Likewise.
* sysdeps/unix/sysv/linux/x86_64/swapcontext.S: Likewise.
* sysdeps/unix/x86_64/sysdep.S: Likewise.
* sysdeps/x86_64/strchr.S: Likewise.
* sysdeps/x86_64/memset.S: Likewise.
* sysdeps/x86_64/strcspn.S: Likewise.
* sysdeps/x86_64/strcmp.S: Likewise.
* sysdeps/x86_64/elf/start.S: Likewise.
* sysdeps/x86_64/strspn.S: Likewise.
* sysdeps/x86_64/dl-machine.h: Likewise.
* sysdeps/x86_64/bsd-_setjmp.S: Likewise.
* sysdeps/x86_64/bsd-setjmp.S: Likewise.
* sysdeps/x86_64/strtok.S: Likewise.
2005-03-30 H.J. Lu <hongjiu.lu@intel.com>
* sysdeps/ia64/fpu/e_acosf.S: Update from Intel libm 2005-03-21.

View File

@ -1,3 +1,8 @@
2005-03-31 Jakub Jelinek <jakub@redhat.com>
* sysdeps/unix/sysv/linux/x86_64/sysdep-cancel.h: Use
functionally equivalent, but shorter instructions.
2005-03-28 Daniel Jacobowitz <dan@codesourcery.com>
* sysdeps/mips/tls.h: New file.

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
/* Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Jakub Jelinek <jakub@redhat.com>, 2002.
@ -45,7 +45,7 @@
POPARGS_##args \
/* The return value from CENABLE is argument for CDISABLE. */ \
movq %rax, (%rsp); \
movq $SYS_ify (syscall_name), %rax; \
movl $SYS_ify (syscall_name), %eax; \
syscall; \
movq (%rsp), %rdi; \
/* Save %rax since it's the error code from the syscall. */ \

View File

@ -1,3 +1,25 @@
2005-03-31 Jakub Jelinek <jakub@redhat.com>
* sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_unlock.S: Use
functionally equivalent, but shorter instructions.
* sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S: Likewise.
* sysdeps/unix/sysv/linux/x86_64/sysdep-cancel.h: Likewise.
* sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_timedrdlock.S:
Likewise.
* sysdeps/unix/sysv/linux/x86_64/pthread_barrier_wait.S: Likewise.
* sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_rdlock.S: Likewise.
* sysdeps/unix/sysv/linux/x86_64/sem_wait.S: Likewise.
* sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_wrlock.S: Likewise.
* sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S: Likewise.
* sysdeps/unix/sysv/linux/x86_64/pthread_once.S: Likewise.
* sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_timedwrlock.S:
Likewise.
* sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S: Likewise.
* sysdeps/unix/sysv/linux/x86_64/lowlevellock.S: Likewise.
* sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S: Likewise.
* sysdeps/unix/sysv/linux/x86_64/sem_post.S: Likewise.
* sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S: Likewise.
2005-03-28 Daniel Jacobowitz <dan@codesourcery.com>
* sysdeps/mips/Makefile: New file.

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
/* Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
@ -48,12 +48,16 @@ __lll_mutex_lock_wait:
xorq %r10, %r10 /* No timeout. */
movl $2, %edx
movq %r10, %rsi /* movq $FUTEX_WAIT, %rsi */
#if FUTEX_WAIT == 0
xorl %esi, %esi
#else
movl $FUTEX_WAIT, %esi
#endif
cmpl %edx, %eax /* NB: %edx == 2 */
jne 2f
1: movq $SYS_futex, %rax
1: movl $SYS_futex, %eax
syscall
2: movl %edx, %eax
@ -93,7 +97,7 @@ __lll_mutex_timedlock_wait:
1:
/* Get current time. */
movq %rsp, %rdi
xorq %rsi, %rsi
xorl %esi, %esi
movq $VSYSCALL_ADDR_vgettimeofday, %rax
/* This is a regular function call, all caller-save registers
might be clobbered. */
@ -101,7 +105,7 @@ __lll_mutex_timedlock_wait:
/* Compute relative timeout. */
movq 8(%rsp), %rax
movq $1000, %rdi
movl $1000, %edi
mul %rdi /* Milli seconds to nano seconds. */
movq (%r13), %rdi
movq 8(%r13), %rsi
@ -126,9 +130,13 @@ __lll_mutex_timedlock_wait:
je 8f
movq %rsp, %r10
xorq %rsi, %rsi /* movq $FUTEX_WAIT, %rsi */
#if FUTEX_WAIT == 0
xorl %esi, %esi
#else
movl $FUTEX_WAIT, %esi
#endif
movq %r12, %rdi
movq $SYS_futex, %rax
movl $SYS_futex, %eax
syscall
movq %rax, %rcx
@ -195,9 +203,9 @@ __lll_mutex_unlock_wake:
pushq %rdx
movl $0, (%rdi)
movq $FUTEX_WAKE, %rsi
movl $FUTEX_WAKE, %esi
movl $1, %edx /* Wake one thread. */
movq $SYS_futex, %rax
movl $SYS_futex, %eax
syscall
popq %rdx
@ -222,13 +230,13 @@ __lll_timedwait_tid:
/* Get current time. */
2: movq %rsp, %rdi
xorq %rsi, %rsi
xorl %esi, %esi
movq $VSYSCALL_ADDR_vgettimeofday, %rax
callq *%rax
/* Compute relative timeout. */
movq 8(%rsp), %rax
movq $1000, %rdi
movl $1000, %edi
mul %rdi /* Milli seconds to nano seconds. */
movq (%r13), %rdi
movq 8(%r13), %rsi
@ -248,9 +256,13 @@ __lll_timedwait_tid:
jz 4f
movq %rsp, %r10
xorq %rsi, %rsi /* movq $FUTEX_WAIT, %rsi */
#if FUTEX_WAIT == 0
xorl %esi, %esi
#else
movl $FUTEX_WAIT, %esi
#endif
movq %r12, %rdi
movq $SYS_futex, %rax
movl $SYS_futex, %eax
syscall
cmpl $0, (%rdi)

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
/* Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
@ -63,9 +63,14 @@ pthread_barrier_wait:
/* Wait for the remaining threads. The call will return immediately
if the CURR_EVENT memory has meanwhile been changed. */
7: xorq %rsi, %rsi /* movq $FUTEX_WAIT, %rsi */
7:
#if FUTEX_WAIT == 0
xorl %esi, %esi
#else
movl $FUTEX_WAIT, %esi
#endif
xorq %r10, %r10
8: movq $SYS_futex, %rax
8: movl $SYS_futex, %eax
syscall
/* Don't return on spurious wakeups. The syscall does not change
@ -110,8 +115,8 @@ pthread_barrier_wait:
/* Wake up all waiters. The count is a signed number in the kernel
so 0x7fffffff is the highest value. */
movl $0x7fffffff, %edx
movq $FUTEX_WAKE, %rsi
movq $SYS_futex, %rax
movl $FUTEX_WAKE, %esi
movl $SYS_futex, %eax
syscall
/* Increment LEFT. If this brings the count back to the

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
/* Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
@ -81,10 +81,10 @@ __pthread_cond_broadcast:
je 9f
/* Wake up all threads. */
movq $FUTEX_CMP_REQUEUE, %rsi
movq $SYS_futex, %rax
movl $FUTEX_CMP_REQUEUE, %esi
movl $SYS_futex, %eax
movl $1, %edx
movq $0x7fffffff, %r10
movl $0x7fffffff, %r10d
syscall
/* For any kind of error, which mainly is EAGAIN, we try again
@ -128,9 +128,9 @@ __pthread_cond_broadcast:
jmp 8b
9: /* The futex requeue functionality is not available. */
movq $0x7fffffff, %rdx
movq $FUTEX_WAKE, %rsi
movq $SYS_futex, %rax
movl $0x7fffffff, %edx
movl $FUTEX_WAKE, %esi
movl $SYS_futex, %eax
syscall
jmp 10b
.size __pthread_cond_broadcast, .-__pthread_cond_broadcast

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
/* Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
@ -66,9 +66,9 @@ __pthread_cond_signal:
addl $1, (%rdi)
/* Wake up one thread. */
movq $FUTEX_WAKE, %rsi
movq $SYS_futex, %rax
movq $1, %rdx
movl $FUTEX_WAKE, %esi
movl $SYS_futex, %eax
movl $1, %edx
syscall
/* Unlock. */

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
/* Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
@ -56,7 +56,7 @@ __pthread_cond_timedwait:
.Lsubq:
cmpq $1000000000, 8(%rdx)
movq $EINVAL, %rax
movl $EINVAL, %eax
jae 18f
/* Stack frame:
@ -102,7 +102,7 @@ __pthread_cond_timedwait:
/* Unlock the mutex. */
2: movq 16(%rsp), %rdi
xorq %rsi, %rsi
xorl %esi, %esi
callq __pthread_mutex_unlock_usercnt
testl %eax, %eax
@ -141,7 +141,7 @@ __pthread_cond_timedwait:
/* Only clocks 0 and 1 are allowed so far. Both are handled in the
kernel. */
leaq 24(%rsp), %rsi
movq $__NR_clock_gettime, %rax
movl $__NR_clock_gettime, %eax
syscall
# ifndef __ASSUME_POSIX_TIMERS
cmpq $-ENOSYS, %rax
@ -155,13 +155,13 @@ __pthread_cond_timedwait:
subq 32(%rsp), %rdx
#else
leaq 24(%rsp), %rdi
xorq %rsi, %rsi
xorl %esi, %esi
movq $VSYSCALL_ADDR_vgettimeofday, %rax
callq *%rax
/* Compute relative timeout. */
movq 32(%rsp), %rax
movq $1000, %rdx
movl $1000, %edx
mul %rdx /* Milli seconds to nano seconds. */
movq (%r13), %rcx
movq 8(%r13), %rdx
@ -195,10 +195,14 @@ __pthread_cond_timedwait:
movl %eax, (%rsp)
leaq 24(%rsp), %r10
xorq %rsi, %rsi /* movq $FUTEX_WAIT, %rsi */
#if FUTEX_WAIT == 0
xorl %esi, %esi
#else
movl $FUTEX_WAIT, %esi
#endif
movq %r12, %rdx
addq $cond_futex, %rdi
movq $SYS_futex, %rax
movl $SYS_futex, %eax
syscall
movq %rax, %r14
@ -237,7 +241,7 @@ __pthread_cond_timedwait:
13: incq wakeup_seq(%rdi)
incl cond_futex(%rdi)
movq $ETIMEDOUT, %r14
movl $ETIMEDOUT, %r14d
jmp 14f
23: xorq %r14, %r14
@ -256,8 +260,8 @@ __pthread_cond_timedwait:
jne 25f
addq $cond_nwaiters, %rdi
movq $SYS_futex, %rax
movq $FUTEX_WAKE, %rsi
movl $SYS_futex, %eax
movl $FUTEX_WAKE, %esi
movl $1, %edx
syscall
subq $cond_nwaiters, %rdi
@ -349,13 +353,13 @@ __pthread_cond_timedwait:
#if defined __NR_clock_gettime && !defined __ASSUME_POSIX_TIMERS
/* clock_gettime not available. */
19: leaq 24(%rsp), %rdi
xorq %rsi, %rsi
xorl %esi, %esi
movq $VSYSCALL_ADDR_vgettimeofday, %rax
callq *%rax
/* Compute relative timeout. */
movq 32(%rsp), %rax
movq $1000, %rdx
movl $1000, %edx
mul %rdx /* Milli seconds to nano seconds. */
movq (%r13), %rcx
movq 8(%r13), %rdx

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
/* Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
@ -82,12 +82,12 @@ __condvar_cleanup:
jne 4f
addq $cond_nwaiters, %rdi
movq $SYS_futex, %rax
movq $FUTEX_WAKE, %rsi
movl $SYS_futex, %eax
movl $FUTEX_WAKE, %esi
movl $1, %edx
syscall
subq $cond_nwaiters, %rdi
movq $1, %r12
movl $1, %r12d
4: LOCK
#if cond_lock == 0
@ -105,9 +105,9 @@ __condvar_cleanup:
2: testq %r12, %r12
jnz 5f
addq $cond_futex, %rdi
movq $FUTEX_WAKE, %rsi
movl $FUTEX_WAKE, %esi
movl $0x7fffffff, %edx
movq $SYS_futex, %rax
movl $SYS_futex, %eax
syscall
5: movq 16(%r8), %rdi
@ -170,7 +170,7 @@ __pthread_cond_wait:
/* Unlock the mutex. */
2: movq 16(%rsp), %rdi
xorq %rsi, %rsi
xorl %esi, %esi
callq __pthread_mutex_unlock_usercnt
testl %eax, %eax
@ -215,8 +215,12 @@ __pthread_cond_wait:
xorq %r10, %r10
movq %r12, %rdx
addq $cond_futex-cond_lock, %rdi
movq $SYS_futex, %rax
movq %r10, %rsi /* movq $FUTEX_WAIT, %rsi */
movl $SYS_futex, %eax
#if FUTEX_WAIT == 0
xorl %esi, %esi
#else
movl $FUTEX_WAIT, %esi
#endif
syscall
movl (%rsp), %edi
@ -262,8 +266,8 @@ __pthread_cond_wait:
jne 17f
addq $cond_nwaiters, %rdi
movq $SYS_futex, %rax
movq $FUTEX_WAKE, %rsi
movl $SYS_futex, %eax
movl $FUTEX_WAKE, %esi
movl $1, %edx
syscall
subq $cond_nwaiters, %rdi

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
/* Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
@ -74,8 +74,12 @@ __pthread_once:
jnz 3f /* Different for generation -> run initializer. */
/* Somebody else got here first. Wait. */
movq %r10, %rsi /* movq $FUTEX_WAIT, %rsi */
movq $SYS_futex, %rax
#if FUTEX_WAIT == 0
xorl %esi, %esi
#else
movl $FUTEX_WAIT, %esi
#endif
movl $SYS_futex, %eax
syscall
jmp 6b
@ -98,12 +102,12 @@ __pthread_once:
/* Wake up all other threads. */
movl $0x7fffffff, %edx
movl $FUTEX_WAKE, %esi
movq $SYS_futex, %rax
movl $SYS_futex, %eax
syscall
4: addq $8, %rsp
.Ladd:
xorq %rax, %rax
xorl %eax, %eax
retq
.size __pthread_once,.-__pthread_once
@ -124,8 +128,8 @@ clear_once_control:
movl $0, (%rdi)
movl $0x7fffffff, %edx
movq $FUTEX_WAKE, %rsi
movq $SYS_futex, %rax
movl $FUTEX_WAKE, %esi
movl $SYS_futex, %eax
syscall
movq %r8, %rdi

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
/* Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
@ -74,8 +74,12 @@ __pthread_rwlock_rdlock:
jne 10f
11: addq $READERS_WAKEUP, %rdi
movq %r10, %rsi /* movq $FUTEX_WAIT, %rsi */
movq $SYS_futex, %rax
#if FUTEX_WAIT == 0
xorl %esi, %esi
#else
movl $FUTEX_WAIT, %esi
#endif
movl $SYS_futex, %eax
syscall
subq $READERS_WAKEUP, %rdi
@ -94,7 +98,7 @@ __pthread_rwlock_rdlock:
13: decl READERS_QUEUED(%rdi)
jmp 2b
5: xorq %rdx, %rdx
5: xorl %edx, %edx
incl NR_READERS(%rdi)
je 8f
9: LOCK
@ -122,7 +126,7 @@ __pthread_rwlock_rdlock:
14: cmpl %fs:TID, %eax
jne 3b
/* Deadlock detected. */
movq $EDEADLK, %rdx
movl $EDEADLK, %edx
jmp 9b
6:
@ -137,12 +141,12 @@ __pthread_rwlock_rdlock:
/* Overflow. */
8: decl NR_READERS(%rdi)
movq $EAGAIN, %rdx
movl $EAGAIN, %edx
jmp 9b
/* Overflow. */
4: decl READERS_QUEUED(%rdi)
movq $EAGAIN, %rdx
movl $EAGAIN, %edx
jmp 9b
10:

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
/* Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
@ -90,13 +90,13 @@ pthread_rwlock_timedrdlock:
/* Get current time. */
11: movq %rsp, %rdi
xorq %rsi, %rsi
xorl %esi, %esi
movq $VSYSCALL_ADDR_vgettimeofday, %rax
callq *%rax
/* Compute relative timeout. */
movq 8(%rsp), %rax
movq $1000, %rdi
movl $1000, %edi
mul %rdi /* Milli seconds to nano seconds. */
movq (%r13), %rcx
movq 8(%r13), %rdi
@ -112,11 +112,15 @@ pthread_rwlock_timedrdlock:
movq %rcx, (%rsp) /* Store relative timeout. */
movq %rdi, 8(%rsp)
xorq %rsi, %rsi /* movq $FUTEX_WAIT, %rsi */
#if FUTEX_WAIT == 0
xorl %esi, %esi
#else
movl $FUTEX_WAIT, %esi
#endif
movq %rsp, %r10
movl %r14d, %edx
leaq READERS_WAKEUP(%r12), %rdi
movq $SYS_futex, %rax
movl $SYS_futex, %eax
syscall
movq %rax, %rdx
17:
@ -136,11 +140,11 @@ pthread_rwlock_timedrdlock:
cmpq $-ETIMEDOUT, %rdx
jne 2b
18: movq $ETIMEDOUT, %rdx
18: movl $ETIMEDOUT, %edx
jmp 9f
5: xorq %rdx, %rdx
5: xorl %edx, %edx
incl NR_READERS(%r12)
je 8f
9: LOCK
@ -168,7 +172,7 @@ pthread_rwlock_timedrdlock:
14: cmpl %fs:TID, %eax
jne 3b
movq $EDEADLK, %rdx
movl $EDEADLK, %edx
jmp 9b
6:
@ -182,12 +186,12 @@ pthread_rwlock_timedrdlock:
/* Overflow. */
8: decl NR_READERS(%r12)
movq $EAGAIN, %rdx
movl $EAGAIN, %edx
jmp 9b
/* Overflow. */
4: decl READERS_QUEUED(%r12)
movq $EAGAIN, %rdx
movl $EAGAIN, %edx
jmp 9b
10:
@ -211,6 +215,6 @@ pthread_rwlock_timedrdlock:
16: movq $-ETIMEDOUT, %rdx
jmp 17b
19: movq $EINVAL, %rdx
19: movl $EINVAL, %edx
jmp 9b
.size pthread_rwlock_timedrdlock,.-pthread_rwlock_timedrdlock

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
/* Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
@ -86,13 +86,13 @@ pthread_rwlock_timedwrlock:
/* Get current time. */
11: movq %rsp, %rdi
xorq %rsi, %rsi
xorl %esi, %esi
movq $VSYSCALL_ADDR_vgettimeofday, %rax
callq *%rax
/* Compute relative timeout. */
movq 8(%rsp), %rax
movq $1000, %rdi
movl $1000, %edi
mul %rdi /* Milli seconds to nano seconds. */
movq (%r13), %rcx
movq 8(%r13), %rdi
@ -108,11 +108,15 @@ pthread_rwlock_timedwrlock:
movq %rcx, (%rsp) /* Store relative timeout. */
movq %rdi, 8(%rsp)
xorq %rsi, %rsi /* movq $FUTEX_WAIT, %rsi */
#if FUTEX_WAIT == 0
xorl %esi, %esi
#else
movl $FUTEX_WAIT, %esi
#endif
movq %rsp, %r10
movl %r14d, %edx
leaq WRITERS_WAKEUP(%r12), %rdi
movq $SYS_futex, %rax
movl $SYS_futex, %eax
syscall
movq %rax, %rdx
17:
@ -132,11 +136,11 @@ pthread_rwlock_timedwrlock:
cmpq $-ETIMEDOUT, %rdx
jne 2b
18: movq $ETIMEDOUT, %rdx
18: movl $ETIMEDOUT, %edx
jmp 9f
5: xorq %rdx, %rdx
5: xorl %edx, %edx
movl %fs:TID, %eax
movl %eax, WRITER(%r12)
9: LOCK
@ -164,7 +168,7 @@ pthread_rwlock_timedwrlock:
14: cmpl %fs:TID, %eax
jne 3b
20: movq $EDEADLK, %rdx
20: movl $EDEADLK, %edx
jmp 9b
6:
@ -178,7 +182,7 @@ pthread_rwlock_timedwrlock:
/* Overflow. */
4: decl WRITERS_QUEUED(%r12)
movq $EAGAIN, %rdx
movl $EAGAIN, %edx
jmp 9b
10:
@ -202,6 +206,6 @@ pthread_rwlock_timedwrlock:
16: movq $-ETIMEDOUT, %rdx
jmp 17b
19: movq $EINVAL, %rdx
19: movl $EINVAL, %edx
jmp 9b
.size pthread_rwlock_timedwrlock,.-pthread_rwlock_timedwrlock

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
/* Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
@ -56,7 +56,7 @@ __pthread_rwlock_unlock:
5: movl $0, WRITER(%rdi)
movq $1, %rsi
movl $1, %esi
leaq WRITERS_WAKEUP(%rdi), %r10
movq %rsi, %rdx
cmpl $0, WRITERS_QUEUED(%rdi)
@ -78,11 +78,11 @@ __pthread_rwlock_unlock:
#endif
jne 7f
8: movq $SYS_futex, %rax
8: movl $SYS_futex, %eax
movq %r10, %rdi
syscall
xorq %rax, %rax
xorl %eax, %eax
retq
.align 16
@ -94,7 +94,7 @@ __pthread_rwlock_unlock:
#endif
jne 3f
4: xorq %rax, %rax
4: xorl %eax, %eax
retq
1:

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
/* Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
@ -72,8 +72,12 @@ __pthread_rwlock_wrlock:
jne 10f
11: addq $WRITERS_WAKEUP, %rdi
movq %r10, %rsi /* movq $FUTEX_WAIT, %rsi */
movq $SYS_futex, %rax
#if FUTEX_WAIT == 0
xorl %esi, %esi
#else
movl $FUTEX_WAIT, %esi
#endif
movl $SYS_futex, %eax
syscall
subq $WRITERS_WAKEUP, %rdi
@ -92,7 +96,7 @@ __pthread_rwlock_wrlock:
13: decl WRITERS_QUEUED(%rdi)
jmp 2b
5: xorq %rdx, %rdx
5: xorl %edx, %edx
movl %fs:TID, %eax
movl %eax, WRITER(%rdi)
9: LOCK
@ -119,7 +123,7 @@ __pthread_rwlock_wrlock:
14: cmpl %fs:TID, %eax
jne 3b
movq $EDEADLK, %rdx
movl $EDEADLK, %edx
jmp 9b
6:

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
/* Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
@ -41,8 +41,8 @@ sem_post:
LOCK
xaddl %edx, (%rdi)
movq $SYS_futex, %rax
movq $FUTEX_WAKE, %rsi
movl $SYS_futex, %eax
movl $FUTEX_WAKE, %esi
incl %edx
syscall

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
/* Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
@ -82,14 +82,14 @@ sem_timedwait:
7: call __pthread_enable_asynccancel
movl %eax, 16(%rsp)
xorq %rsi, %rsi
xorl %esi, %esi
movq %rsp, %rdi
movq $VSYSCALL_ADDR_vgettimeofday, %rax
callq *%rax
/* Compute relative timeout. */
movq 8(%rsp), %rax
movq $1000, %rdi
movl $1000, %edi
mul %rdi /* Milli seconds to nano seconds. */
movq (%r13), %rdi
movq 8(%r13), %rsi
@ -107,8 +107,8 @@ sem_timedwait:
movq %rsp, %r10
movq %r12, %rdi
xorq %rsi, %rsi
movq $SYS_futex, %rax
xorl %esi, %esi
movl $SYS_futex, %eax
xorl %edx, %edx
syscall
movq %rax, %r14

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
/* Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
@ -77,7 +77,7 @@ sem_wait:
movl %eax, %r8d
xorq %r10, %r10
movq $SYS_futex, %rax
movl $SYS_futex, %eax
movq %r13, %rdi
movq %r10, %rsi
movq %r10, %rdx

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
/* Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Jakub Jelinek <jakub@redhat.com>, 2002.
@ -48,7 +48,7 @@
POPARGS_##args \
/* The return value from CENABLE is argument for CDISABLE. */ \
movq %rax, (%rsp); \
movq $SYS_ify (syscall_name), %rax; \
movl $SYS_ify (syscall_name), %eax; \
syscall; \
movq (%rsp), %rdi; \
/* Save %rax since it's the error code from the syscall. */ \

View File

@ -27,7 +27,8 @@ sysdep_routines += libc_libm_error libm_frexp libm_frexpf libm_frexpl \
$(duplicated-routines)
sysdep-CPPFLAGS += -include libm-symbols.h \
-D__POSIX__ \
-D__POSIX__ -Dopensource \
-D_LIB_VERSIONIMF=_LIB_VERSION \
-DSIZE_INT_32 -DSIZE_LONG_INT_64 -DSIZE_LONG_LONG_INT_64
-DSIZE_INT_32 -DSIZE_LONG_INT_64 -DSIZE_LONG_LONG_INT_64 \
-DSIZE_LONG_64 -DIA64
endif

View File

@ -824,6 +824,7 @@ acos_abs_gt_1:
GLOBAL_LIBM_END(acos)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi

View File

@ -601,6 +601,7 @@ ACOSF_ABS_ONE:
GLOBAL_LIBM_END(acosf)
// Stack operations when calling error support.
// (1) (2)
// sp -> + psp -> +

View File

@ -1139,6 +1139,7 @@ ACOSH_LESS_ONE:
GLOBAL_LIBM_END(acosh)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue

View File

@ -968,6 +968,7 @@ ACOSH_LESS_ONE:
GLOBAL_LIBM_END(acoshf)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue

View File

@ -1650,6 +1650,7 @@ acoshl_lt_pone:
GLOBAL_LIBM_END(acoshl)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi

View File

@ -2482,6 +2482,7 @@ acosl_SPECIAL_CASES:
GLOBAL_LIBM_END(acosl)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
// (1)

View File

@ -800,6 +800,7 @@ asin_abs_gt_1:
GLOBAL_LIBM_END(asin)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi

View File

@ -583,6 +583,7 @@ ASINF_ABS_ONE:
;;
GLOBAL_LIBM_END(asinf)
// Stack operations when calling error support.
// (1) (2)
// sp -> + psp -> +

View File

@ -2459,6 +2459,7 @@ SMALL_S:
GLOBAL_LIBM_END(asinl)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
// (1)

View File

@ -52,6 +52,7 @@
// 08/20/02 Corrected inexact flag and directed rounding symmetry bugs
// 02/06/03 Reordered header: .section, .global, .proc, .align
// 04/17/03 Added missing mutex directive
// 12/23/03 atan2(NaN1,NaN2) now QNaN1, for consistency with atan2f, atan2l
//
// API
//==============================================================
@ -142,7 +143,7 @@
// -0 -0 -pi
//
// Nan anything quiet Y
// anything NaN quiet X
// Not NaN NaN quiet X
// atan2(+-0/+-0) sets double error tag to 37
@ -388,7 +389,7 @@ GLOBAL_IEEE754_ENTRY(atan2)
}
{ .mfb
ldfe atan2_P21 = [EXP_AD_P2],16
(p10) fma.d.s0 f8 = atan2_Y,atan2_X,f0 // If y=nan, result quietized y
(p10) fma.d.s0 f8 = atan2_X,atan2_Y,f0 // If y=nan, result quietized y
(p10) br.ret.spnt b0 // Exit if y=nan
;;
}
@ -985,6 +986,7 @@ ATAN2_ERROR:
}
GLOBAL_IEEE754_END(atan2)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
// (1)

View File

@ -827,6 +827,7 @@ ATAN2F_XY_INF_NAN_ZERO:
GLOBAL_IEEE754_END(atan2f)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
mov GR_Parameter_TAG = 38

View File

@ -1008,6 +1008,7 @@ atanh_ge_one:
GLOBAL_LIBM_END(atanh)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue

View File

@ -782,6 +782,7 @@ atanhf_ge_one:
GLOBAL_LIBM_END(atanhf)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue

View File

@ -1101,6 +1101,7 @@ atanhl_gt_one:
};;
GLOBAL_LIBM_END(atanhl)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi

View File

@ -811,6 +811,7 @@ COSH_UNORM:
GLOBAL_IEEE754_END(cosh)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi

View File

@ -652,6 +652,7 @@ COSH_UNORM:
GLOBAL_IEEE754_END(coshf)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi

View File

@ -1033,6 +1033,7 @@ COSH_HUGE:
GLOBAL_IEEE754_END(coshl)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue

View File

@ -1,7 +1,7 @@
.file "exp.s"
// Copyright (c) 2000 - 2002, Intel Corporation
// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
@ -52,6 +52,7 @@
// 05/20/02 Cleaned up namespace and sf0 syntax
// 09/07/02 Force inexact flag
// 11/15/02 Split underflow path into zero/nonzero; eliminated fma in main path
// 05/30/03 Set inexact flag on unmasked overflow/underflow
// API
//==============================================================
@ -602,7 +603,7 @@ EXP_CERTAIN_OVERFLOW:
}
{ .mfb
mov GR_Parameter_TAG = 14
fma.d.s0 FR_RESULT = fTmp, fTmp, f0 // Set I,O and +INF result
fma.d.s0 FR_RESULT = fTmp, fTmp, fTmp // Set I,O and +INF result
br.cond.sptk __libm_error_region
}
;;
@ -685,6 +686,13 @@ EXP_CERTAIN_UNDERFLOW:
}
;;
{ .mfi
nop.m 0
fmerge.se fTmp = fTmp, fLn2_by_128_lo // Small with signif lsb 1
nop.i 0
}
;;
{ .mfb
nop.m 0
fma.d.s0 f8 = fTmp, fTmp, f0 // Set I,U, tiny (+0.0) result
@ -730,6 +738,7 @@ EXP_UNDERFLOW_ZERO:
GLOBAL_IEEE754_END(exp)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi

View File

@ -1,7 +1,7 @@
.file "exp10.s"
// Copyright (c) 2000 - 2003, Intel Corporation
// Copyright (c) 2000 - 2004, Intel Corporation
// All rights reserved.
//
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
@ -43,6 +43,7 @@
// 05/20/02 Cleaned up namespace and sf0 syntax
// 09/06/02 Improved performance; no inexact flags on exact cases
// 01/29/03 Added missing } to bundle templates
// 12/16/04 Call error handling on underflow.
//
// API
//==============================================================
@ -81,8 +82,8 @@
// Registers used
//==============================================================
// r2-r3, r14-r40
// f6-f15, f32-f51
// p6-p9, p12
// f6-f15, f32-f52
// p6-p12
//
@ -104,6 +105,7 @@ GR_EXPMAX = r24
GR_BIAS53 = r25
GR_ROUNDVAL = r26
GR_SNORM_LIMIT = r26
GR_MASK = r27
GR_KF0 = r28
GR_MASK_low = r29
@ -161,6 +163,7 @@ FR_E = f49
FR_exact_limit = f50
FR_int_x = f51
FR_SNORM_LIMIT = f52
// Data tables
@ -256,8 +259,12 @@ GLOBAL_IEEE754_ENTRY(exp10)
}
;;
{.mib
{.mlx
ldfe FR_LOG2_10= [ GR_COEFF_START ], 16 // load log2(10)*2^(10-63)
movl GR_SNORM_LIMIT= 0xc0733a7146f72a41 // Smallest normal threshold
}
{.mib
nop.m 0
nop.i 0
(p12) br.cond.spnt SPECIAL_exp10 // Branch if nan, inf, zero
}
@ -284,7 +291,7 @@ GLOBAL_IEEE754_ENTRY(exp10)
;;
{.mfi
nop.m 0
setf.d FR_SNORM_LIMIT= GR_SNORM_LIMIT // Set smallest normal limit
fma.s1 FR_L2_10_high= FR_LOG2_10, FR_2P53, f0 // FR_LOG2_10= log2(10)_hi
nop.i 0
}
@ -388,6 +395,13 @@ GLOBAL_IEEE754_ENTRY(exp10)
}
;;
{.mfi
nop.m 0
fcmp.ge.s1 p11,p0= f8, FR_SNORM_LIMIT // Test x for normal range
nop.i 0
}
;;
{.mfi
nop.m 0
fma.s1 FR_E= FR_E0, FR_COEFF1, f0 // E= C_1*e
@ -431,10 +445,17 @@ GLOBAL_IEEE754_ENTRY(exp10)
{.mfb
nop.m 0
(p9) fma.d.s1 f8= FR_P, FR_T, FR_T // result= T+T*P, exact use s1
br.ret.sptk b0 // return
(p11) br.ret.sptk b0 // return, if result normal
}
;;
// Here if result in denormal range (and not zero)
{.mib
nop.m 0
mov GR_Parameter_TAG= 265
br.cond.sptk __libm_error_region // Branch to error handling
}
;;
SPECIAL_exp10:
{.mfi
@ -487,53 +508,35 @@ SPECIAL_exp10:
OUT_RANGE_exp10:
// underflow: p6= 1
// overflow: p8= 1
{.mii
.pred.rel "mutex",p6,p8
{.mmi
(p8) mov GR_EXPMAX= 0x1fffe
nop.i 0
nop.i 0
}
;;
{.mmb
(p8) mov GR_Parameter_TAG= 166
(p8) setf.exp FR_R= GR_EXPMAX
nop.b 999
}
;;
{.mfi
nop.m 999
(p8) fma.d.s0 f8= FR_R, FR_R, f0 // Create overflow
nop.i 999
}
// underflow: p6= 1
{.mii
nop.m 0
(p6) mov GR_EXPMAX= 1
nop.i 0
}
;;
{.mmb
nop.m 0
(p6) setf.exp FR_R= GR_EXPMAX
nop.b 999
{.mii
setf.exp FR_R= GR_EXPMAX
(p8) mov GR_Parameter_TAG= 166
(p6) mov GR_Parameter_TAG= 265
}
;;
{.mfb
nop.m 999
(p6) fma.d.s0 f8= FR_R, FR_R, f0 // Create underflow
(p6) br.ret.sptk b0 // will not call libm_error for underflow
nop.m 0
fma.d.s0 f8= FR_R, FR_R, f0 // Create overflow/underflow
br.cond.sptk __libm_error_region // Branch to error handling
}
;;
GLOBAL_IEEE754_END(exp10)
weak_alias (exp10, pow10)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue

View File

@ -1,7 +1,7 @@
.file "exp10f.s"
// Copyright (c) 2000 - 2003, Intel Corporation
// Copyright (c) 2000 - 2004, Intel Corporation
// All rights reserved.
//
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
@ -43,6 +43,7 @@
// 05/20/02 Cleaned up namespace and sf0 syntax
// 09/06/02 Improved performance and accuracy; no inexact flags on exact cases
// 01/29/03 Added missing } to bundle templates
// 12/16/04 Call error handling on underflow.
//
// API
//==============================================================
@ -80,8 +81,8 @@
// Registers used
//==============================================================
// r2-r3, r14-r40
// f6-f15, f32-f51
// p6-p9, p12
// f6-f15, f32-f52
// p6-p12
//
@ -102,6 +103,7 @@ GR_Fh_ADDR = r23
GR_EXPMAX = r24
GR_ROUNDVAL = r26
GR_SNORM_LIMIT = r26
GR_MASK = r27
GR_KF0 = r28
GR_MASK_low = r29
@ -153,6 +155,7 @@ FR_E = f49
FR_exact_limit = f50
FR_int_x = f51
FR_SNORM_LIMIT = f52
// Data tables
@ -246,8 +249,12 @@ GLOBAL_IEEE754_ENTRY(exp10f)
}
;;
{.mib
{.mlx
ldfe FR_LOG2_10= [ GR_COEFF_START ], 16 // load log2(10)*2^(10-63)
movl GR_SNORM_LIMIT= 0xc217b818 // Smallest normal threshold
}
{.mib
nop.m 0
nop.i 0
(p12) br.cond.spnt SPECIAL_exp10 // Branch if nan, inf, zero
}
@ -261,7 +268,7 @@ GLOBAL_IEEE754_ENTRY(exp10f)
;;
{.mfi
nop.m 0
setf.s FR_SNORM_LIMIT= GR_SNORM_LIMIT // Set smallest normal limit
(p8) fcvt.fx.s1 FR_int_x = f8 // Convert x to integer
nop.i 0
}
@ -335,7 +342,7 @@ GLOBAL_IEEE754_ENTRY(exp10f)
{.mfb
ldf8 FR_T_high= [ GR_Fh_ADDR ] // load T_high= 2^{f_high}
nop.f 0
fcmp.ge.s1 p11, p0= f8, FR_SNORM_LIMIT // Test x for normal range
(p12) br.cond.spnt OUT_RANGE_exp10
}
;;
@ -390,10 +397,17 @@ GLOBAL_IEEE754_ENTRY(exp10f)
{.mfb
nop.m 0
(p9) fma.s.s1 f8= FR_P, FR_T, FR_T // result= T+T*P, exact use s1
br.ret.sptk b0 // return
(p11) br.ret.sptk b0 // return, if result normal
}
;;
// Here if result in denormal range (and not zero)
{.mib
nop.m 0
mov GR_Parameter_TAG= 266
br.cond.sptk __libm_error_region // Branch to error handling
}
;;
SPECIAL_exp10:
{.mfi
@ -446,53 +460,35 @@ SPECIAL_exp10:
OUT_RANGE_exp10:
// underflow: p6= 1
// overflow: p8= 1
{.mii
.pred.rel "mutex",p6,p8
{.mmi
(p8) mov GR_EXPMAX= 0x1fffe
nop.i 0
nop.i 0
}
;;
{.mmb
(p8) mov GR_Parameter_TAG= 167
(p8) setf.exp FR_R= GR_EXPMAX
nop.b 999
}
;;
{.mfi
nop.m 999
(p8) fma.s.s0 f8= FR_R, FR_R, f0 // Create overflow
nop.i 999
}
// underflow: p6= 1
{.mii
nop.m 0
(p6) mov GR_EXPMAX= 1
nop.i 0
}
;;
{.mmb
nop.m 0
(p6) setf.exp FR_R= GR_EXPMAX
nop.b 999
{.mii
setf.exp FR_R= GR_EXPMAX
(p8) mov GR_Parameter_TAG= 167
(p6) mov GR_Parameter_TAG= 266
}
;;
{.mfb
nop.m 999
(p6) fma.s.s0 f8= FR_R, FR_R, f0 // Create underflow
(p6) br.ret.sptk b0 // will not call libm_error for underflow
nop.m 0
fma.s.s0 f8= FR_R, FR_R, f0 // Create overflow/underflow
br.cond.sptk __libm_error_region // Branch to error handling
}
;;
GLOBAL_IEEE754_END(exp10f)
weak_alias (exp10f, pow10f)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue

View File

@ -1,7 +1,7 @@
.file "exp10l.s"
// Copyright (c) 2000 - 2003, Intel Corporation
// Copyright (c) 2000 - 2004, Intel Corporation
// All rights reserved.
//
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
@ -44,6 +44,7 @@
// 02/06/03 Reordered header: .section, .global, .proc, .align
// 05/08/03 Reformatted assembly source; corrected overflow result for round to
// -inf and round to zero; exact results now don't set inexact flag
// 12/16/04 Call error handling on underflow.
//
// API
//==============================================================
@ -79,9 +80,9 @@
// Registers used
//==============================================================
// f6-f15, f32-f62
// f6-f15, f32-f63
// r14-r30, r32-r40
// p6-p8, p12-p14
// p6-p8, p11-p14
//
@ -129,6 +130,7 @@
FR_4 = f60
FR_28 = f61
FR_32 = f62
FR_SNORM_LIMIT = f63
GR_ADDR0 = r14
@ -178,6 +180,7 @@ LOCAL_OBJECT_START(poly_coeffs)
data8 0x3f55d87fe78a6731 // C_5
data8 0x3f2430912f86c787 // C_6
data8 0x9257edfe9b5fb698, 0x00003fbf // log2(10)_low (bits 64...127)
data8 0x9a1bc98027a81918, 0x0000c00b // Smallest normal threshold
LOCAL_OBJECT_END(poly_coeffs)
@ -435,7 +438,7 @@ GLOBAL_IEEE754_ENTRY(exp10l)
{.mmf
// GR_D_ADDR = pointer to D table
add GR_D_ADDR = 2048-64+96+16, GR_ADDR0
add GR_D_ADDR = 2048-64+96+32, GR_ADDR0
// load C_3, C_4
ldfpd FR_COEFF3, FR_COEFF4 = [ GR_ADDR0 ], 16
// y = x*log2(10)*2^8
@ -471,7 +474,8 @@ GLOBAL_IEEE754_ENTRY(exp10l)
}
{.mfi
nop.m 0
// load smallest normal limit
ldfe FR_SNORM_LIMIT = [ GR_ADDR0 ], 16
// x>overflow threshold ?
fcmp.gt.s1 p12, p7 = f8, FR_OF_TEST
nop.i 0 ;;
@ -596,6 +600,13 @@ GLOBAL_IEEE754_ENTRY(exp10l)
nop.i 0 ;;
}
{.mfi
nop.m 0
// test if x >= smallest normal limit
fcmp.ge.s1 p11, p0 = f8, FR_SNORM_LIMIT
nop.i 0 ;;
}
{.mfi
nop.m 0
// P36 = P34+r2*P56
@ -646,9 +657,16 @@ GLOBAL_IEEE754_ENTRY(exp10l)
// result = T+T*P
(p14) fma.s0 f8 = FR_COEFF3, FR_UF_TEST, FR_UF_TEST
// return
br.ret.sptk b0 ;;
(p11) br.ret.sptk b0 ;; // return, if result normal
}
// Here if result in denormal range (and not zero)
{.mib
nop.m 0
mov GR_Parameter_TAG= 264
br.cond.sptk __libm_error_region // Branch to error handling
}
;;
SPECIAL_EXP10:
@ -703,47 +721,35 @@ SPECIAL_EXP10:
OUT_RANGE_EXP10:
{.mii
// overflow: p8 = 1
(p8) mov GR_CONST1 = 0x1fffe
nop.i 0
nop.i 0 ;;
}
{.mmb
(p8) mov GR_Parameter_TAG = 165
(p8) setf.exp FR_KF0 = GR_CONST1
nop.b 999 ;;
}
{.mfi
nop.m 999
(p8) fma.s0 f8 = FR_KF0, FR_KF0, f0
nop.i 999
}
{.mii
nop.m 0
// underflow: p6 = 1
(p6) mov GR_CONST1 = 1
nop.i 0 ;;
}
// overflow: p8 = 1
{.mmb
nop.m 0
(p6) setf.exp FR_KF0 = GR_CONST1
nop.b 999 ;;
.pred.rel "mutex",p6,p8
{.mmi
(p8) mov GR_CONST1 = 0x1fffe
(p6) mov GR_CONST1 = 1
nop.i 0
}
;;
{.mii
setf.exp FR_KF0 = GR_CONST1
(p8) mov GR_Parameter_TAG = 165
(p6) mov GR_Parameter_TAG = 264
}
;;
{.mfb
nop.m 999
(p6) fma.s0 f8 = FR_KF0, FR_KF0, f0
// will not call libm_error for underflow
(p6) br.ret.sptk b0 ;;
fma.s0 f8 = FR_KF0, FR_KF0, f0 // Create overflow/underflow
br.cond.sptk __libm_error_region // Branch to error handling
}
;;
GLOBAL_IEEE754_END(exp10l)
weak_alias (exp10l, pow10l)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{.mfi

View File

@ -495,6 +495,7 @@ OUT_RANGE_exp2:
GLOBAL_LIBM_END(exp2)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue

View File

@ -470,6 +470,7 @@ OUT_RANGE_exp2:
GLOBAL_LIBM_END(exp2f)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue

View File

@ -747,6 +747,7 @@ OUT_RANGE_exp2l:
GLOBAL_LIBM_END(exp2l)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{.mfi

View File

@ -1,7 +1,7 @@
.file "expf.s"
// Copyright (c) 2000 - 2002, Intel Corporation
// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
@ -52,6 +52,7 @@
// 09/26/02 support of higher precision inputs added, underflow threshold
// corrected
// 11/15/02 Improved performance on Itanium 2, added possible over/under paths
// 05/30/03 Set inexact flag on unmasked overflow/underflow
//
//
// API
@ -521,7 +522,7 @@ EXP_CERTAIN_OVERFLOW:
}
{ .mfb
mov GR_Parameter_TAG = 16
fma.s.s0 FR_RESULT = fTmp, fTmp, f0 // Set I,O and +INF result
fma.s.s0 FR_RESULT = fTmp, fTmp, fTmp // Set I,O and +INF result
br.cond.sptk __libm_error_region
}
;;
@ -604,6 +605,13 @@ EXP_CERTAIN_UNDERFLOW:
}
;;
{ .mfi
nop.m 0
fmerge.se fTmp = fTmp, f64DivLn2 // Small with non-trial signif
nop.i 0
}
;;
{ .mfb
nop.m 0
fma.s.s0 f8 = fTmp, fTmp, f0 // Set I,U, tiny (+0.0) result
@ -649,6 +657,7 @@ EXP_UNDERFLOW_ZERO:
GLOBAL_IEEE754_END(expf)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi

View File

@ -499,6 +499,7 @@ FMOD_Y_ZERO:
}
GLOBAL_IEEE754_END(fmod)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi

View File

@ -514,6 +514,7 @@ EXP_ERROR_RETURN:
}
GLOBAL_IEEE754_END(fmodf)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi

View File

@ -1,7 +1,7 @@
.file "fmodl.s"
// Copyright (c) 2000 - 2003, Intel Corporation
// Copyright (c) 2000 - 2004, Intel Corporation
// All rights reserved.
//
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
@ -49,6 +49,7 @@
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header:.section,.global,.proc,.align
// 04/28/03 Fix: fmod(sNaN, 0) no longer sets errno
// 11/23/04 Reformatted routine and improved speed
//
// API
//====================================================================
@ -75,9 +76,16 @@
//
// Registers used
//====================================================================
// Predicate registers: p6-p11
// General registers: r2,r29,r32 (ar.pfs), r33-r39
// Floating point registers: f6-f15
GR_SMALLBIASEXP = r2
GR_2P32 = r3
GR_SMALLBIASEXP = r20
GR_ROUNDCONST = r21
GR_SIG_B = r22
GR_ARPFS = r23
GR_TMP1 = r24
GR_TMP2 = r25
GR_TMP3 = r26
GR_SAVE_B0 = r33
GR_SAVE_PFS = r34
@ -93,6 +101,30 @@ FR_X = f10
FR_Y = f9
FR_RESULT = f8
FR_ABS_A = f6
FR_ABS_B = f7
FR_Y_INV = f10
FR_SMALLBIAS = f11
FR_E0 = f12
FR_Q = f13
FR_E1 = f14
FR_2P32 = f15
FR_TMPX = f32
FR_TMPY = f33
FR_ROUNDCONST = f34
FR_QINT = f35
FR_QRND24 = f36
FR_NORM_B = f37
FR_TMP = f38
FR_TMP2 = f39
FR_DFLAG = f40
FR_Y_INV0 = f41
FR_Y_INV1 = f42
FR_Q0 = f43
FR_Q1 = f44
FR_QINT_Z = f45
FR_QREM = f46
FR_B_SGN_A = f47
.section .text
GLOBAL_IEEE754_ENTRY(fmodl)
@ -101,291 +133,306 @@ GLOBAL_IEEE754_ENTRY(fmodl)
// result in f8
{ .mfi
alloc r32=ar.pfs,1,4,4,0
// f6=|a|
fmerge.s f6=f0,f8
mov r2 = 0x0ffdd
getf.sig GR_SIG_B = f9
// FR_ABS_A = |a|
fmerge.s FR_ABS_A = f0, f8
mov GR_SMALLBIASEXP = 0x0ffdd
}
{ .mfi
getf.sig r29=f9
// f7=|b|
fmerge.s f7=f0,f9
nop.i 0;;
nop.m 0
// FR_ABS_B = |b|
fmerge.s FR_ABS_B = f0, f9
nop.i 0
}
;;
{ .mfi
setf.exp f11 = r2
setf.exp FR_SMALLBIAS = GR_SMALLBIASEXP
// (1) y0
frcpa.s1 f10,p6=f6,f7
nop.i 0;;
frcpa.s1 FR_Y_INV0, p6 = FR_ABS_A, FR_ABS_B
nop.i 0
}
;;
{ .mlx
nop.m 0
movl GR_ROUNDCONST = 0x33a00000
}
;;
// eliminate special cases
{ .mmi
nop.m 0
nop.m 0
// y pseudo-zero ?
cmp.eq p7,p10=r29,r0;;
cmp.eq p7, p10 = GR_SIG_B, r0
}
;;
// Y +-NAN, +-inf, +-0? p7
// set p7 if b +/-NAN, +/-inf, +/-0
{ .mfi
nop.m 999
nop.m 0
(p10) fclass.m p7, p10 = f9, 0xe7
nop.i 999;;
}
// qnan snan inf norm unorm 0 -+
// 1 1 1 0 0 0 11
// e 3
// X +-NAN, +-inf, ? p9
{ .mfi
nop.m 999
fclass.m.unc p9,p11 = f8, 0xe3
nop.i 999
}
// |x| < |y|? Return x p8
{ .mfi
nop.m 999
(p10) fcmp.lt.unc.s1 p8,p0 = f6,f7
nop.i 999 ;;
}
{ .mfi
mov r2=0x1001f
// (2) q0=a*y0
(p6) fma.s1 f13=f6,f10,f0
nop.i 0
} { .mfi
}
;;
{ .mfi
mov GR_2P32 = 0x1001f
// (2) q0 = a*y0
(p6) fma.s1 FR_Q0 = FR_ABS_A, FR_Y_INV0, f0
nop.i 0
}
{ .mfi
nop.m 0
// (3) e0 = 1 - b * y0
(p6) fnma.s1 f12=f7,f10,f1
nop.i 0;;
(p6) fnma.s1 FR_E0 = FR_ABS_B, FR_Y_INV0, f1
nop.i 0
}
;;
// Y +-NAN, +-inf, +-0? p7
// set p9 if a +/-NAN, +/-inf
{ .mfi
nop.m 999
nop.m 0
fclass.m.unc p9, p11 = f8, 0xe3
nop.i 0
}
// |a| < |b|? Return a, p8=1
{ .mfi
nop.m 0
(p10) fcmp.lt.unc.s1 p8, p0 = FR_ABS_A, FR_ABS_B
nop.i 0
}
;;
// set p7 if b +/-NAN, +/-inf, +/-0
{ .mfi
nop.m 0
// pseudo-NaN ?
(p10) fclass.nm p7, p0 = f9, 0xff
nop.i 999
nop.i 0
}
;;
// qnan snan inf norm unorm 0 -+
// 1 1 1 0 0 0 11
// e 3
// X +-NAN, +-inf, ? p9
// set p9 if a is +/-NaN, +/-Inf
{ .mfi
nop.m 999
nop.m 0
(p11) fclass.nm p9, p0 = f8, 0xff
nop.i 999;;
}
{ .mfi
nop.m 0
// y denormal ? set D flag (if |x|<|y|)
(p8) fnma.s0 f10=f9,f1,f9
nop.i 0;;
}
{.mfi
nop.m 0
// normalize x (if |x|<|y|)
(p8) fma.s0 f8=f8,f1,f0
nop.i 0
}
{.bbb
(p9) br.cond.spnt FMOD_X_NAN_INF
(p7) br.cond.spnt FMOD_Y_NAN_INF_ZERO
// if |x|<|y|, return
(p8) br.ret.spnt b0;;
}
{.mfi
nop.m 0
// x denormal ? set D flag
fnma.s0 f32=f6,f1,f6
nop.i 0
}
{ .mfi
nop.m 0
// y denormal ? set D flag
fnma.s0 f33=f7,f1,f7
nop.i 0;;
// b denormal ? set D flag (if |a|<|b|)
(p8) fnma.s0 FR_DFLAG = f9, f1, f9
nop.i 0
}
;;
{ .mfi
// f15=2^32
setf.exp f15=r2
// FR_2P32 = 2^32
setf.exp FR_2P32 = GR_2P32
// (4) q1 = q0+e0*q0
(p6) fma.s1 f13=f12,f13,f13
(p6) fma.s1 FR_Q1 = FR_E0, FR_Q0, FR_Q0
nop.i 0
}
{ .mfi
nop.m 0
// (5) e1 = e0 * e0 + 2^-34
(p6) fma.s1 f14=f12,f12,f11
nop.i 0;;
(p6) fma.s1 FR_E1 = FR_E0, FR_E0, FR_SMALLBIAS
nop.i 0
}
{.mlx
;;
{ .mfi
nop.m 0
movl r2=0x33a00000;;
// normalize a (if |a|<|b|)
(p8) fma.s0 f8 = f8, f1, f0
nop.i 0
}
{ .bbb
(p9) br.cond.spnt FMOD_A_NAN_INF
(p7) br.cond.spnt FMOD_B_NAN_INF_ZERO
// if |a|<|b|, return
(p8) br.ret.spnt b0
}
;;
{ .mfi
nop.m 0
// (6) y1 = y0 + e0 * y0
(p6) fma.s1 f10=f12,f10,f10
nop.i 0;;
}
{.mfi
// set f12=1.25*2^{-24}
setf.s f12=r2
// (7) q2=q1+e1*q1
(p6) fma.s1 f13=f13,f14,f13
nop.i 0;;
(p6) fma.s1 FR_Y_INV1 = FR_E0, FR_Y_INV0, FR_Y_INV0
nop.i 0
}
;;
{ .mfi
nop.m 0
fmerge.s f9=f8,f9
// a denormal ? set D flag
// b denormal ? set D flag
fcmp.eq.s0 p12,p0 = FR_ABS_A, FR_ABS_B
nop.i 0
}
{ .mfi
// set FR_ROUNDCONST = 1.25*2^{-24}
setf.s FR_ROUNDCONST = GR_ROUNDCONST
// (7) q2 = q1+e1*q1
(p6) fma.s1 FR_Q = FR_Q1, FR_E1, FR_Q1
nop.i 0
}
;;
{ .mfi
nop.m 0
fmerge.s FR_B_SGN_A = f8, f9
nop.i 0
}
{ .mfi
nop.m 0
// (8) y2 = y1 + e1 * y1
(p6) fma.s1 f10=f14,f10,f10
(p6) fma.s1 FR_Y_INV = FR_E1, FR_Y_INV1, FR_Y_INV1
// set p6 = 0, p10 = 0
cmp.ne.and p6,p10=r0,r0;;
cmp.ne.and p6, p10 = r0, r0
}
;;
// will compute integer quotient bits (24 bits per iteration)
.align 32
loop64:
{ .mfi
nop.m 0
// compare q2, 2^32
fcmp.lt.unc.s1 p8,p7=f13,f15
fcmp.lt.unc.s1 p8, p7 = FR_Q, FR_2P32
nop.i 0
}
{ .mfi
nop.m 0
// will truncate quotient to integer, if exponent<32 (in advance)
fcvt.fx.trunc.s1 f11=f13
nop.i 0;;
fcvt.fx.trunc.s1 FR_QINT = FR_Q
nop.i 0
}
{.mfi
nop.m 0
// if exponent>32, round quotient to single precision (perform in advance)
fma.s.s1 f13=f13,f1,f0
nop.i 0;;
}
;;
{ .mfi
nop.m 0
// set f12=sgn(a)
(p8) fmerge.s f12=f8,f1
// if exponent>32 round quotient to single precision (perform in advance)
fma.s.s1 FR_QRND24 = FR_Q, f1, f0
nop.i 0
}
;;
{ .mfi
nop.m 0
// set FR_ROUNDCONST = sgn(a)
(p8) fmerge.s FR_ROUNDCONST = f8, f1
nop.i 0
}
{ .mfi
nop.m 0
// normalize truncated quotient
(p8) fcvt.xf f13=f11
nop.i 0;;
(p8) fcvt.xf FR_QRND24 = FR_QINT
nop.i 0
}
;;
{ .mfi
nop.m 0
// calculate remainder (assuming f13=RZ(Q))
(p7) fnma.s1 f14=f13,f7,f6
// calculate remainder (assuming FR_QRND24 = RZ(Q))
(p7) fnma.s1 FR_E1 = FR_QRND24, FR_ABS_B, FR_ABS_A
nop.i 0
}
{ .mfi
nop.m 0
// also if exponent>32, round quotient to single precision
// and subtract 1 ulp: q = q-q*(1.25*2^{-24})
(p7) fnma.s.s1 f11=f13,f12,f13
nop.i 0;;
(p7) fnma.s.s1 FR_QINT_Z = FR_QRND24, FR_ROUNDCONST, FR_QRND24
nop.i 0
}
;;
{ .mfi
nop.m 0
// (p8) calculate remainder (82-bit format)
(p8) fnma.s1 f11=f13,f7,f6
(p8) fnma.s1 FR_QREM = FR_QRND24, FR_ABS_B, FR_ABS_A
nop.i 0
}
{ .mfi
nop.m 0
// (p7) calculate remainder (assuming f11=RZ(Q))
(p7) fnma.s1 f6=f11,f7,f6
nop.i 0;;
// (p7) calculate remainder (assuming FR_QINT_Z = RZ(Q))
(p7) fnma.s1 FR_ABS_A = FR_QINT_Z, FR_ABS_B, FR_ABS_A
nop.i 0
}
;;
{ .mfi
nop.m 0
// Final iteration (p8): is f6 the correct remainder (quotient was not overestimated) ?
(p8) fcmp.lt.unc.s1 p6,p10=f11,f0
nop.i 0;;
// Final iteration (p8): is FR_ABS_A the correct remainder
// (quotient was not overestimated) ?
(p8) fcmp.lt.unc.s1 p6, p10 = FR_QREM, f0
nop.i 0
}
;;
{ .mfi
nop.m 0
// get new quotient estimation: a'*y2
(p7) fma.s1 f13=f14,f10,f0
(p7) fma.s1 FR_Q = FR_E1, FR_Y_INV, f0
nop.i 0
}
{ .mfb
nop.m 0
// was f13=RZ(Q) ? (then new remainder f14>=0)
(p7) fcmp.lt.unc.s1 p7,p9=f14,f0
nop.b 0;;
// was FR_Q = RZ(Q) ? (then new remainder FR_E1> = 0)
(p7) fcmp.lt.unc.s1 p7, p9 = FR_E1, f0
nop.b 0
}
;;
.pred.rel "mutex", p6, p10
{ .mfb
nop.m 0
// add b to estimated remainder (to cover the case when the quotient was overestimated)
// also set correct sign by using f9=|b|*sgn(a), f12=sgn(a)
(p6) fma.s0 f8=f11,f12,f9
// add b to estimated remainder (to cover the case when the quotient was
// overestimated)
// also set correct sign by using
// FR_B_SGN_A = |b|*sgn(a), FR_ROUNDCONST = sgn(a)
(p6) fma.s0 f8 = FR_QREM, FR_ROUNDCONST, FR_B_SGN_A
nop.b 0
}
{ .mfb
nop.m 0
// set correct sign of result before returning: f12=sgn(a)
(p10) fma.s0 f8=f11,f12,f0
(p8) br.ret.sptk b0;;
// set correct sign of result before returning: FR_ROUNDCONST = sgn(a)
(p10) fma.s0 f8 = FR_QREM, FR_ROUNDCONST, f0
(p8) br.ret.sptk b0
}
;;
{ .mfi
nop.m 0
// if f13! = RZ(Q), get alternative quotient estimation: a''*y2
(p7) fma.s1 f13=f6,f10,f0
(p7) fma.s1 FR_Q = FR_ABS_A, FR_Y_INV, f0
nop.i 0
}
{ .mfb
nop.m 0
// if f14 was RZ(Q), set remainder to f14
(p9) mov f6=f14
br.cond.sptk loop64;;
// if FR_E1 was RZ(Q), set remainder to FR_E1
(p9) fma.s1 FR_ABS_A = FR_E1, f1, f0
br.cond.sptk loop64
}
;;
FMOD_A_NAN_INF:
FMOD_X_NAN_INF:
// Y zero ?
// b zero ?
{ .mfi
nop.m 0
fclass.m p10,p0=f8,0xc3 // Test x=nan
fclass.m p10, p0 = f8, 0xc3 // Test a = nan
nop.i 0
}
{ .mfi
nop.m 0
fma.s1 f10=f9,f1,f0
nop.i 0;;
fma.s1 FR_NORM_B = f9, f1, f0
nop.i 0
}
;;
{ .mfi
nop.m 0
@ -395,97 +442,116 @@ FMOD_X_NAN_INF:
{ .mfi
nop.m 0
(p10) fclass.m p10, p0 = f9, 0x07 // Test x = nan, and y = zero
nop.i 0;;
nop.i 0
}
;;
{ .mfb
nop.m 0
fcmp.eq.unc.s1 p11,p0=f10,f0
(p10) br.ret.spnt b0;; // Exit with result=x if x=nan and y=zero
fcmp.eq.unc.s1 p11, p0 = FR_NORM_B, f0
(p10) br.ret.spnt b0 // Exit with result = a if a = nan and b = zero
}
;;
{ .mib
nop.m 0
nop.i 0
// if Y zero
(p11) br.cond.spnt FMOD_Y_ZERO;;
(p11) br.cond.spnt FMOD_B_ZERO
}
;;
// X infinity? Return QNAN indefinite
// a= infinity? Return QNAN indefinite
{ .mfi
// set p7 t0 0
cmp.ne p7, p0 = r0, r0
fclass.m.unc p8, p9 = f8, 0x23
nop.i 999;;
nop.i 0
}
// Y NaN ?
;;
// b NaN ?
{ .mfi
nop.m 999
nop.m 0
(p8) fclass.m p9, p8 = f9, 0xc3
nop.i 0;;
nop.i 0
}
// Y not pseudo-zero ? (r29 holds significand)
;;
// b not pseudo-zero ? (GR_SIG_B holds significand)
{ .mii
nop.m 999
(p8) cmp.ne p7,p0=r29,r0
nop.i 0;;
nop.m 0
(p8) cmp.ne p7, p0 = GR_SIG_B, r0
nop.i 0
}
;;
{ .mfi
nop.m 999
nop.m 0
(p8) frcpa.s0 f8, p0 = f8, f8
nop.i 0
}
{ .mfi
nop.m 999
nop.m 0
// also set Denormal flag if necessary
(p7) fnma.s0 f9 = f9, f1, f9
nop.i 999 ;;
nop.i 0
}
;;
{ .mfb
nop.m 999
nop.m 0
(p8) fma.s0 f8 = f8, f1, f0
nop.b 999 ;;
nop.b 0
}
;;
{ .mfb
nop.m 999
nop.m 0
(p9) frcpa.s0 f8, p7 = f8, f9
br.ret.sptk b0 ;;
br.ret.sptk b0
}
;;
FMOD_Y_NAN_INF_ZERO:
// Y INF
FMOD_B_NAN_INF_ZERO:
// b INF
{ .mfi
nop.m 999
nop.m 0
fclass.m.unc p7, p0 = f9, 0x23
nop.i 999 ;;
nop.i 0
}
;;
{ .mfb
nop.m 999
nop.m 0
(p7) fma.s0 f8 = f8, f1, f0
(p7) br.ret.spnt b0 ;;
(p7) br.ret.spnt b0
}
;;
// Y NAN?
// b NAN?
{ .mfi
nop.m 999
nop.m 0
fclass.m.unc p9, p10 = f9, 0xc3
nop.i 999 ;;
nop.i 0
}
;;
{ .mfi
nop.m 999
nop.m 0
(p10) fclass.nm p9, p0 = f9, 0xff
nop.i 999 ;;
nop.i 0
}
;;
{ .mfb
nop.m 999
nop.m 0
(p9) fma.s0 f8 = f9, f1, f0
(p9) br.ret.spnt b0 ;;
(p9) br.ret.spnt b0
}
;;
FMOD_Y_ZERO:
FMOD_B_ZERO:
// Y zero? Must be zero at this point
// because it is the only choice left.
// Return QNAN indefinite
@ -493,49 +559,54 @@ FMOD_Y_ZERO:
{ .mfi
nop.m 0
// set Invalid
frcpa.s0 f12,p0=f0,f0
frcpa.s0 FR_TMP, p0 = f0, f0
nop.i 0
}
// X NAN?
;;
// a NAN?
{ .mfi
nop.m 999
nop.m 0
fclass.m.unc p9, p10 = f8, 0xc3
nop.i 999 ;;
nop.i 0
}
;;
{ .mfi
nop.m 999
alloc GR_ARPFS = ar.pfs, 1, 4, 4, 0
(p10) fclass.nm p9, p10 = f8, 0xff
nop.i 999 ;;
nop.i 0
}
;;
{ .mfi
nop.m 999
(p9) frcpa.s0 f11,p7=f8,f0
nop.i 0;;
nop.m 0
(p9) frcpa.s0 FR_TMP2, p7 = f8, f0
nop.i 0
}
;;
{ .mfi
nop.m 999
(p10) frcpa.s0 f11,p7 = f9,f9
mov GR_Parameter_TAG = 120 ;;
nop.m 0
(p10) frcpa.s0 FR_TMP2, p7 = f9, f9
mov GR_Parameter_TAG = 120
}
;;
{ .mfi
nop.m 999
fmerge.s f10 = f8, f8
nop.i 999
nop.m 0
fmerge.s FR_X = f8, f8
nop.i 0
}
{ .mfb
nop.m 999
fma.s0 f8=f11,f1,f0
br.sptk __libm_error_region;;
nop.m 0
fma.s0 f8 = FR_TMP2, f1, f0
br.sptk __libm_error_region
}
;;
GLOBAL_IEEE754_END(fmodl)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
@ -549,13 +620,17 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
add sp = -64, sp // Create new stack
nop.f 0
mov GR_SAVE_GP = gp // Save gp
};;
}
;;
{ .mmi
stfe [ GR_Parameter_Y ] = FR_Y, 16 // Save Parameter 2 on stack
add GR_Parameter_X = 16, sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0 = b0 // Save b0
};;
}
;;
.body
{ .mib
stfe [ GR_Parameter_X ] = FR_X // Store Parameter 1 on stack
@ -566,30 +641,32 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
stfe [ GR_Parameter_Y ] = FR_RESULT // Store Parameter 3 on stack
add GR_Parameter_Y = -16, GR_Parameter_Y
br.call.sptk b0 = __libm_error_support# // Call error handling function
};;
}
;;
{ .mmi
nop.m 0
nop.m 0
add GR_Parameter_RESULT = 48, sp
};;
}
;;
{ .mmi
ldfe f8 = [ GR_Parameter_RESULT ] // Get return result off stack
.restore sp
add sp = 64, sp // Restore stack pointer
mov b0 = GR_SAVE_B0 // Restore return address
};;
}
;;
{ .mib
mov gp = GR_SAVE_GP // Restore gp
mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
br.ret.sptk b0 // Return
};;
}
;;
LOCAL_LIBM_END(__libm_error_region)
.type __libm_error_support#, @function
.global __libm_error_support#

View File

@ -106,6 +106,7 @@ FR_RESULT = f8
LOCAL_LIBM_ENTRY(cabs)
LOCAL_LIBM_END(cabs)
GLOBAL_IEEE754_ENTRY(hypot)
{.mfi
@ -384,6 +385,7 @@ GLOBAL_IEEE754_ENTRY(hypot)
(p9) br.ret.sptk b0;;
}
GLOBAL_IEEE754_END(hypot)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi

View File

@ -106,6 +106,7 @@ FR_RESULT = f8
LOCAL_LIBM_ENTRY(cabsf)
LOCAL_LIBM_END(cabsf)
GLOBAL_IEEE754_ENTRY(hypotf)
{.mfi
alloc r32= ar.pfs,0,4,4,0
@ -337,6 +338,7 @@ GLOBAL_IEEE754_ENTRY(hypotf)
(p9) br.ret.sptk b0;;
}
GLOBAL_IEEE754_END(hypotf)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mii

View File

@ -105,6 +105,7 @@ FR_RESULT = f8
LOCAL_LIBM_ENTRY(cabsl)
LOCAL_LIBM_END(cabsl)
GLOBAL_IEEE754_ENTRY(hypotl)
{.mfi
alloc r32= ar.pfs,0,4,4,0
@ -421,6 +422,7 @@ GLOBAL_IEEE754_ENTRY(hypotl)
(p9) br.ret.sptk b0;;
}
GLOBAL_IEEE754_END(hypotl)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi

View File

@ -1,5 +1,6 @@
/* file: lgamma_r.c */
// Copyright (c) 2002 Intel Corporation
// All rights reserved.
//
@ -20,7 +21,6 @@
// products derived from this software without specific prior written
// permission.
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

View File

@ -1,5 +1,6 @@
/* file: lgammaf_r.c */
// Copyright (c) 2002 Intel Corporation
// All rights reserved.
//
@ -20,7 +21,6 @@
// products derived from this software without specific prior written
// permission.
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

View File

@ -1,5 +1,6 @@
/* file: lgammal_r.c */
// Copyright (c) 2002 Intel Corporation
// All rights reserved.
//
@ -20,7 +21,6 @@
// products derived from this software without specific prior written
// permission.
// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

View File

@ -1386,6 +1386,7 @@ GLOBAL_IEEE754_ENTRY(log10)
};;
GLOBAL_IEEE754_END(log10)
GLOBAL_IEEE754_ENTRY(log)
{ .mfi
getf.exp GR_Exp = f8 // if x is unorm then must recompute
@ -1667,6 +1668,7 @@ log_libm_err:
};;
GLOBAL_IEEE754_END(log)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi

View File

@ -655,6 +655,7 @@ SPECIAL_LOG2:
GLOBAL_LIBM_END(log2)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi

View File

@ -493,6 +493,7 @@ SPECIAL_log2f:
GLOBAL_LIBM_END(log2f)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi

View File

@ -761,6 +761,7 @@ LOG2_PSEUDO_ZERO:
GLOBAL_IEEE754_END(log2l)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi

View File

@ -841,6 +841,7 @@ GLOBAL_IEEE754_ENTRY(log10f)
br.cond.sptk logf_log10f_common
};;
GLOBAL_IEEE754_END(log10f)
GLOBAL_IEEE754_ENTRY(logf)
{ .mfi
getf.exp GR_Exp = f8 // if x is unorm then must recompute
@ -1087,6 +1088,7 @@ logf_libm_err:
};;
GLOBAL_IEEE754_END(logf)
// Stack operations when calling error support.
// (1) (2) (3) (call) (4)
// sp -> + psp -> + psp -> + sp -> +

View File

@ -634,6 +634,7 @@ GLOBAL_IEEE754_ENTRY(logl)
GLOBAL_IEEE754_END(logl)
GLOBAL_IEEE754_ENTRY(log10l)
{ .mfi
alloc r32 = ar.pfs,0,21,4,0
@ -1144,6 +1145,7 @@ LOGL_64_negative:
GLOBAL_IEEE754_END(log10l)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi

View File

@ -2234,6 +2234,7 @@ POW_OVER_UNDER_ERROR:
GLOBAL_LIBM_END(pow)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue

File diff suppressed because it is too large Load Diff

View File

@ -60,6 +60,7 @@
// 02/10/03 Reordered header: .section, .global, .proc, .align;
// used data8 for long double table values
// 04/17/03 Added missing mutex directive
// 10/13/03 Corrected .endp names to match .proc names
//
//*********************************************************************
//
@ -2755,6 +2756,7 @@ POWL_64_SQRT:
GLOBAL_LIBM_END(powl)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
@ -2803,6 +2805,6 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
br.ret.sptk b0 // Return
};;
.endp
LOCAL_LIBM_END(__libm_error_region#)
.type __libm_error_support#,@function
.global __libm_error_support#

View File

@ -531,6 +531,7 @@ EXP_ERROR_RETURN:
GLOBAL_IEEE754_END(remainder)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi

View File

@ -550,6 +550,7 @@ EXP_ERROR_RETURN:
GLOBAL_IEEE754_END(remainderf)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi

View File

@ -557,6 +557,7 @@ EXP_ERROR_RETURN:
}
GLOBAL_IEEE754_END(remainderl)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi

View File

@ -43,6 +43,7 @@
// 01/26/01 Scalb completely reworked and now standalone version
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
// 08/06/03 Improved performance
//
// API
//==============================================================
@ -50,31 +51,52 @@
// input floating point f8 and floating point f9
// output floating point f8
//
// int_type = 0 if int is 32 bits
// int_type = 1 if int is 64 bits
//
// Returns x* 2**n using an fma and detects overflow
// and underflow.
//
//
// Strategy:
// Compute biased exponent of result exp_Result = N + exp_X
// Break into ranges:
// exp_Result > 0x103fe -> Certain overflow
// exp_Result = 0x103fe -> Possible overflow
// 0x0fc01 <= exp_Result < 0x103fe -> No over/underflow (main path)
// 0x0fc01 - 52 <= exp_Result < 0x0fc01 -> Possible underflow
// exp_Result < 0x0fc01 - 52 -> Certain underflow
FR_Big = f6
FR_NBig = f7
FR_Floating_X = f8
FR_Result = f8
FR_Floating_N = f9
FR_Result2 = f9
FR_Norm_N = f10
FR_Result3 = f11
FR_Norm_X = f12
FR_Result3 = f10
FR_Norm_X = f11
FR_Two_N = f12
FR_N_float_int = f13
FR_Two_N = f14
FR_Two_to_Big = f15
FR_Big = f6
FR_NBig = f7
FR_Norm_N = f14
GR_neg_ov_limit= r14
GR_big_exp = r14
GR_N_Biased = r15
GR_Big = r16
GR_NBig = r17
GR_Scratch = r18
GR_Scratch1 = r19
GR_exp_Result = r18
GR_pos_ov_limit= r19
GR_exp_sure_ou = r19
GR_Bias = r20
GR_N_as_int = r21
GR_signexp_X = r22
GR_exp_X = r23
GR_exp_mask = r24
GR_max_exp = r25
GR_min_exp = r26
GR_min_den_exp = r27
GR_Scratch = r28
GR_signexp_N = r29
GR_exp_N = r30
GR_SAVE_B0 = r32
GR_SAVE_GP = r33
@ -89,163 +111,156 @@ GLOBAL_IEEE754_ENTRY(scalb)
//
// Is x NAN, INF, ZERO, +-?
//
{ .mfi
alloc r32=ar.pfs,0,3,4,0
fclass.m.unc p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
addl GR_Scratch = 0x019C3F,r0
}
//
// Is y a NAN, INF, ZERO, +-?
//
{ .mfi
nop.m 999
fclass.m.unc p6,p0 = FR_Floating_N, 0xe7 //@snan | @qnan | @inf | @zero
addl GR_Scratch1 = 0x063BF,r0
}
;;
//
// Convert N to a fp integer
// Normalize x
//
{ .mfi
nop.m 0
fnorm.s1 FR_Norm_N = FR_Floating_N
nop.i 999
}
{ .mfi
nop.m 999
fnorm.s1 FR_Norm_X = FR_Floating_X
nop.i 999
};;
//
// Create 2*big
// Create 2**-big
// Normalize x
// Branch on special values.
//
{ .mib
setf.exp FR_Big = GR_Scratch
nop.i 0
(p6) br.cond.spnt SCALB_NAN_INF_ZERO
}
{ .mib
setf.exp FR_NBig = GR_Scratch1
nop.i 0
(p7) br.cond.spnt SCALB_NAN_INF_ZERO
};;
//
// Convert N to a fp integer
// Create -35000
//
{ .mfi
addl GR_Scratch = 1,r0
fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N
addl GR_NBig = -35000,r0
}
;;
//
// Put N if a GP register
// Convert N_float_int to floating point value
// Create 35000
// Build the exponent Bias
//
{ .mii
getf.sig GR_N_as_int = FR_N_float_int
shl GR_Scratch = GR_Scratch,63
addl GR_Big = 35000,r0
{ .mfi
getf.exp GR_signexp_N = FR_Floating_N // Get signexp of n
fclass.m p6,p0 = FR_Floating_X, 0xe7 // @snan | @qnan | @inf | @zero
mov GR_Bias = 0x0ffff
}
{ .mfi
addl GR_Bias = 0x0FFFF,r0
fcvt.xf FR_N_float_int = FR_N_float_int
mov GR_Big = 35000 // If N this big then certain overflow
fcvt.fx.trunc.s1 FR_N_float_int = FR_Floating_N // Get N in significand
nop.i 0
};;
}
;;
{ .mfi
getf.exp GR_signexp_X = FR_Floating_X // Get signexp of x
fclass.m p7,p0 = FR_Floating_N, 0x0b // Test for n=unorm
nop.i 0
}
//
// Normalize n
//
{ .mfi
mov GR_exp_mask = 0x1ffff // Exponent mask
fnorm.s1 FR_Norm_N = FR_Floating_N
nop.i 0
}
;;
//
// Catch those fp values that are beyond 2**64-1
// Is N > 35000
// Is N < -35000
// Is n NAN, INF, ZERO, +-?
//
{ .mfi
cmp.ne.unc p9,p10 = GR_N_as_int,GR_Scratch
nop.f 0
nop.i 0
mov GR_big_exp = 0x1003e // Exponent at which n is integer
fclass.m p9,p0 = FR_Floating_N, 0xe7 // @snan | @qnan | @inf | @zero
mov GR_max_exp = 0x103fe // Exponent of maximum double
}
//
// Normalize x
//
{ .mfb
nop.m 0
fnorm.s1 FR_Norm_X = FR_Floating_X
(p7) br.cond.spnt SCALB_N_UNORM // Branch if n=unorm
}
;;
SCALB_COMMON1:
// Main path continues. Also return here from u=unorm path.
// Handle special cases if x = Nan, Inf, Zero
{ .mfb
nop.m 0
fcmp.lt.s1 p7,p0 = FR_Floating_N, f0 // Test N negative
(p6) br.cond.spnt SCALB_NAN_INF_ZERO
}
;;
// Handle special cases if n = Nan, Inf, Zero
{ .mfi
getf.sig GR_N_as_int = FR_N_float_int // Get n from significand
fclass.m p8,p0 = FR_Floating_X, 0x0b // Test for x=unorm
mov GR_exp_sure_ou = 0x1000e // Exp_N where x*2^N sure over/under
}
{ .mfb
mov GR_min_exp = 0x0fc01 // Exponent of minimum double
fcvt.xf FR_N_float_int = FR_N_float_int // Convert N to FP integer
(p9) br.cond.spnt SCALB_NAN_INF_ZERO
}
;;
{ .mmi
cmp.ge.unc p6, p0 = GR_N_as_int, GR_Big
cmp.le.unc p8, p0 = GR_N_as_int, GR_NBig
and GR_exp_N = GR_exp_mask, GR_signexp_N // Get exponent of N
(p7) sub GR_Big = r0, GR_Big // Limit for N
nop.i 0
};;
}
;;
//
// Is N really an int, only for those non-int indefinites?
// Create exp bias.
//
{ .mfi
{ .mib
cmp.lt p9,p0 = GR_exp_N, GR_big_exp // N possible non-integer?
cmp.ge p6,p0 = GR_exp_N, GR_exp_sure_ou // N certain over/under?
(p8) br.cond.spnt SCALB_X_UNORM // Branch if x=unorm
}
;;
SCALB_COMMON2:
// Main path continues. Also return here from x=unorm path.
// Create biased exponent for 2**N
{ .mmi
(p6) mov GR_N_as_int = GR_Big // Limit N
;;
add GR_N_Biased = GR_Bias,GR_N_as_int
(p9) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int
nop.i 0
};;
}
;;
{ .mfi
setf.exp FR_Two_N = GR_N_Biased // Form 2**N
(p9) fcmp.neq.unc.s1 p9,p0 = FR_Norm_N, FR_N_float_int // Test if N an integer
and GR_exp_X = GR_exp_mask, GR_signexp_X // Get exponent of X
}
;;
//
// Branch and return if N is not an int.
// Main path, create 2**N
// Compute biased result exponent
// Branch if N is not an integer
//
{ .mib
add GR_exp_Result = GR_exp_X, GR_N_as_int
mov GR_min_den_exp = 0x0fc01 - 52 // Exponent of min denorm dble
(p9) br.cond.spnt SCALB_N_NOT_INT
}
;;
//
// Raise Denormal operand flag with compare
// Do final operation
//
{ .mfi
setf.exp FR_Two_N = GR_N_Biased
nop.i 999
cmp.lt p7,p6 = GR_exp_Result, GR_max_exp // Test no overflow
fcmp.ge.s0 p0,p11 = FR_Floating_X,FR_Floating_N // Dummy to set denorm
cmp.lt p9,p0 = GR_exp_Result, GR_min_den_exp // Test sure underflow
}
{ .mfb
nop.m 0
(p7) frcpa.s0 f8,p11 = f0,f0
(p7) br.ret.spnt b0
};;
//
// Set denormal on denormal input x and denormal input N
//
{ .mfi
nop.m 999
(p10)fcmp.ge.s1 p6,p8 = FR_Norm_N,f0
nop.i 0
};;
{ .mfi
nop.m 999
fcmp.ge.s0 p0,p11 = FR_Floating_X,f0
nop.i 999
fma.d.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
(p9) br.cond.spnt SCALB_UNDERFLOW // Branch if certain underflow
}
{ .mfi
nop.m 999
fcmp.ge.s0 p12,p13 = FR_Floating_N,f0
nop.i 0
};;
;;
//
// Adjust 2**N if N was very small or very large
//
{ .mib
(p6) cmp.gt.unc p6,p8 = GR_exp_Result, GR_max_exp // Test sure overflow
(p7) cmp.ge.unc p7,p9 = GR_exp_Result, GR_min_exp // Test no over/underflow
(p7) br.ret.sptk b0 // Return from main path
}
;;
{ .mfi
nop.m 0
(p6) fma.s1 FR_Two_N = FR_Big,f1,f0
nop.i 0
{ .bbb
(p6) br.cond.spnt SCALB_OVERFLOW // Branch if certain overflow
(p8) br.cond.spnt SCALB_POSSIBLE_OVERFLOW // Branch if possible overflow
(p9) br.cond.spnt SCALB_POSSIBLE_UNDERFLOW // Branch if possible underflow
}
{ .mlx
nop.m 999
movl GR_Scratch = 0x00000000000303FF
};;
{ .mfi
nop.m 0
(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0
nop.i 0
}
{ .mlx
nop.m 999
movl GR_Scratch1= 0x00000000000103FF
};;
;;
// Here if possible underflow.
// Resulting exponent: 0x0fc01-52 <= exp_Result < 0x0fc01
SCALB_POSSIBLE_UNDERFLOW:
//
// Here if possible overflow.
// Resulting exponent: 0x103fe = exp_Result
SCALB_POSSIBLE_OVERFLOW:
// Set up necessary status fields
//
@ -254,137 +269,150 @@ GLOBAL_IEEE754_ENTRY(scalb)
// S3 user supplied status + FZ + TD (Underflows)
//
{ .mfi
nop.m 999
mov GR_pos_ov_limit = 0x103ff // Exponent for positive overflow
fsetc.s3 0x7F,0x41
nop.i 999
nop.i 0
}
{ .mfi
nop.m 999
mov GR_neg_ov_limit = 0x303ff // Exponent for negative overflow
fsetc.s2 0x7F,0x42
nop.i 999
};;
nop.i 0
}
;;
//
// Do final operation
// Do final operation with s2 and s3
//
{ .mfi
setf.exp FR_NBig = GR_Scratch
fma.d.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
nop.i 999
setf.exp FR_NBig = GR_neg_ov_limit
fma.d.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
nop.i 0
}
{ .mfi
nop.m 999
fma.d.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
nop.i 999
};;
{ .mfi
setf.exp FR_Big = GR_Scratch1
setf.exp FR_Big = GR_pos_ov_limit
fma.d.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
nop.i 999
};;
nop.i 0
}
;;
// Check for overflow or underflow.
//
// S0 user supplied status
// S2 user supplied status + WRE + TD (Overflow)
// S3 user supplied status + FZ + TD (Underflow)
//
//
// Restore s3
// Restore s2
//
{ .mfi
nop.m 0
fsetc.s3 0x7F,0x40
nop.i 999
nop.i 0
}
{ .mfi
nop.m 0
fsetc.s2 0x7F,0x40
nop.i 999
};;
nop.i 0
}
;;
//
// Is the result zero?
//
{ .mfi
nop.m 999
fclass.m.unc p6, p0 = FR_Result3, 0x007
nop.i 999
nop.m 0
fclass.m p6, p0 = FR_Result3, 0x007
nop.i 0
}
{ .mfi
addl GR_Tag = 53, r0
fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big
nop.m 0
fcmp.ge.s1 p7, p8 = FR_Result2 , FR_Big
nop.i 0
};;
}
;;
//
// Detect masked underflow - Tiny + Inexact Only
//
{ .mfi
nop.m 999
nop.m 0
(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
nop.i 999
};;
nop.i 0
}
;;
//
// Is result bigger the allowed range?
// Branch out for underflow
//
{ .mfb
(p6) addl GR_Tag = 54, r0
nop.m 0
(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
(p6) br.cond.spnt SCALB_UNDERFLOW
};;
}
;;
//
// Branch out for overflow
//
{ .mbb
nop.m 0
{ .bbb
(p7) br.cond.spnt SCALB_OVERFLOW
(p9) br.cond.spnt SCALB_OVERFLOW
};;
//
// Return from main path.
//
{ .mfb
nop.m 999
nop.f 0
br.ret.sptk b0;;
br.ret.sptk b0 // Return from main path.
}
;;
// Here if result overflows
SCALB_OVERFLOW:
{ .mib
alloc r32=ar.pfs,3,0,4,0
addl GR_Tag = 53, r0 // Set error tag for overflow
br.cond.sptk __libm_error_region // Call error support for overflow
}
;;
// Here if result underflows
SCALB_UNDERFLOW:
{ .mib
alloc r32=ar.pfs,3,0,4,0
addl GR_Tag = 54, r0 // Set error tag for underflow
br.cond.sptk __libm_error_region // Call error support for underflow
}
;;
SCALB_NAN_INF_ZERO:
//
// Convert N to a fp integer
// Before entry, N has been converted to a fp integer in significand of
// FR_N_float_int
//
// Convert N_float_int to floating point value
//
{ .mfi
getf.sig GR_N_as_int = FR_N_float_int
fclass.m p6,p0 = FR_Floating_N, 0xc3 //@snan | @qnan
nop.i 0
}
{ .mfi
addl GR_Scratch = 1,r0
fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N
nop.i 999
}
{ .mfi
nop.m 0
fclass.m.unc p6,p0 = FR_Floating_N, 0xc3 //@snan | @qnan
fcvt.xf FR_N_float_int = FR_N_float_int
nop.i 0
};;
}
;;
{ .mfi
nop.m 0
fclass.m.unc p7,p0 = FR_Floating_X, 0xc3 //@snan | @qnan
fclass.m p7,p0 = FR_Floating_X, 0xc3 //@snan | @qnan
shl GR_Scratch = GR_Scratch,63
};;
}
;;
{ .mfi
nop.m 0
fclass.m.unc p8,p0 = FR_Floating_N, 0x21 // @inf
fclass.m p8,p0 = FR_Floating_N, 0x21 // @inf
nop.i 0
}
{ .mfi
nop.m 0
fclass.m.unc p9,p0 = FR_Floating_N, 0x22 // @-inf
fclass.m p9,p0 = FR_Floating_N, 0x22 // @-inf
nop.i 0
};;
}
;;
//
// Either X or N is a Nan, return result and possible raise invalid.
@ -393,12 +421,15 @@ SCALB_NAN_INF_ZERO:
nop.m 0
(p6) fma.d.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
(p6) br.ret.spnt b0
};;
}
;;
{ .mfb
getf.sig GR_N_as_int = FR_N_float_int
nop.m 0
(p7) fma.d.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
(p7) br.ret.spnt b0
};;
}
;;
//
// If N + Inf do something special
@ -413,43 +444,38 @@ SCALB_NAN_INF_ZERO:
nop.m 0
(p9) fnma.d.s0 FR_Floating_N = FR_Floating_N, f1, f0
nop.i 0
};;
}
;;
//
// If N==-Inf,return x/(-N)
//
{ .mfb
nop.m 0
(p9) frcpa.s0 FR_Result,p6 = FR_Floating_X,FR_Floating_N
cmp.ne p7,p0 = GR_N_as_int,GR_Scratch
(p9) frcpa.s0 FR_Result,p0 = FR_Floating_X,FR_Floating_N
(p9) br.ret.spnt b0
};;
//
// Convert N_float_int to floating point value
//
{ .mfi
cmp.ne.unc p9,p0 = GR_N_as_int,GR_Scratch
fcvt.xf FR_N_float_int = FR_N_float_int
nop.i 0
};;
}
;;
//
// Is N an integer.
//
{ .mfi
nop.m 0
(p9) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int
(p7) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int
nop.i 0
};;
}
;;
//
// If N not an int, return NaN and raise invalid.
//
{ .mfb
nop.m 0
(p7) frcpa.s0 FR_Result,p6 = f0,f0
(p7) frcpa.s0 FR_Result,p0 = f0,f0
(p7) br.ret.spnt b0
};;
}
;;
//
// Always return x in other path.
@ -458,13 +484,39 @@ SCALB_NAN_INF_ZERO:
nop.m 0
fma.d.s0 FR_Result = FR_Floating_X,f1,f0
br.ret.sptk b0
};;
}
;;
// Here if n not int
// Return NaN and raise invalid.
SCALB_N_NOT_INT:
{ .mfb
nop.m 0
frcpa.s0 FR_Result,p0 = f0,f0
br.ret.sptk b0
}
;;
// Here if n=unorm
SCALB_N_UNORM:
{ .mfb
getf.exp GR_signexp_N = FR_Norm_N // Get signexp of normalized n
fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N // Get N in significand
br.cond.sptk SCALB_COMMON1 // Return to main path
}
;;
// Here if x=unorm
SCALB_X_UNORM:
{ .mib
getf.exp GR_signexp_X = FR_Norm_X // Get signexp of normalized x
nop.i 0
br.cond.sptk SCALB_COMMON2 // Return to main path
}
;;
GLOBAL_IEEE754_END(scalb)
__libm_error_region:
SCALB_OVERFLOW:
SCALB_UNDERFLOW:
LOCAL_LIBM_ENTRY(__libm_error_region)
//
// Get stack address of N
@ -517,9 +569,9 @@ SCALB_UNDERFLOW:
// Get location of result on stack
//
{ .mmi
nop.m 0
nop.m 0
add GR_Parameter_RESULT = 48,sp
nop.m 0
nop.i 0
};;
//

View File

@ -43,6 +43,7 @@
// 01/26/01 Scalb completely reworked and now standalone version
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
// 08/06/03 Improved performance
//
// API
//==============================================================
@ -50,31 +51,52 @@
// input floating point f8 and floating point f9
// output floating point f8
//
// int_type = 0 if int is 32 bits
// int_type = 1 if int is 64 bits
//
// Returns x* 2**n using an fma and detects overflow
// and underflow.
//
//
// Strategy:
// Compute biased exponent of result exp_Result = N + exp_X
// Break into ranges:
// exp_Result > 0x1007e -> Certain overflow
// exp_Result = 0x1007e -> Possible overflow
// 0x0ff81 <= exp_Result < 0x1007e -> No over/underflow (main path)
// 0x0ff81 - 23 <= exp_Result < 0x0ff81 -> Possible underflow
// exp_Result < 0x0ff81 - 23 -> Certain underflow
FR_Big = f6
FR_NBig = f7
FR_Floating_X = f8
FR_Result = f8
FR_Floating_N = f9
FR_Result2 = f9
FR_Norm_N = f10
FR_Result3 = f11
FR_Norm_X = f12
FR_Result3 = f10
FR_Norm_X = f11
FR_Two_N = f12
FR_N_float_int = f13
FR_Two_N = f14
FR_Two_to_Big = f15
FR_Big = f6
FR_NBig = f7
FR_Norm_N = f14
GR_neg_ov_limit= r14
GR_big_exp = r14
GR_N_Biased = r15
GR_Big = r16
GR_NBig = r17
GR_Scratch = r18
GR_Scratch1 = r19
GR_exp_Result = r18
GR_pos_ov_limit= r19
GR_exp_sure_ou = r19
GR_Bias = r20
GR_N_as_int = r21
GR_signexp_X = r22
GR_exp_X = r23
GR_exp_mask = r24
GR_max_exp = r25
GR_min_exp = r26
GR_min_den_exp = r27
GR_Scratch = r28
GR_signexp_N = r29
GR_exp_N = r30
GR_SAVE_B0 = r32
GR_SAVE_GP = r33
@ -89,163 +111,156 @@ GLOBAL_IEEE754_ENTRY(scalbf)
//
// Is x NAN, INF, ZERO, +-?
//
{ .mfi
alloc r32=ar.pfs,0,3,4,0
fclass.m.unc p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
addl GR_Scratch = 0x019C3F,r0
}
//
// Is y a NAN, INF, ZERO, +-?
//
{ .mfi
nop.m 999
fclass.m.unc p6,p0 = FR_Floating_N, 0xe7 //@snan | @qnan | @inf | @zero
addl GR_Scratch1 = 0x063BF,r0
}
;;
//
// Convert N to a fp integer
// Normalize x
//
{ .mfi
nop.m 0
fnorm.s1 FR_Norm_N = FR_Floating_N
nop.i 999
}
{ .mfi
nop.m 999
fnorm.s1 FR_Norm_X = FR_Floating_X
nop.i 999
};;
//
// Create 2*big
// Create 2**-big
// Normalize x
// Branch on special values.
//
{ .mib
setf.exp FR_Big = GR_Scratch
nop.i 0
(p6) br.cond.spnt SCALBF_NAN_INF_ZERO
}
{ .mib
setf.exp FR_NBig = GR_Scratch1
nop.i 0
(p7) br.cond.spnt SCALBF_NAN_INF_ZERO
};;
//
// Convert N to a fp integer
// Create -35000
//
{ .mfi
addl GR_Scratch = 1,r0
fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N
addl GR_NBig = -35000,r0
}
;;
//
// Put N if a GP register
// Convert N_float_int to floating point value
// Create 35000
// Build the exponent Bias
//
{ .mii
getf.sig GR_N_as_int = FR_N_float_int
shl GR_Scratch = GR_Scratch,63
addl GR_Big = 35000,r0
{ .mfi
getf.exp GR_signexp_N = FR_Floating_N // Get signexp of n
fclass.m p6,p0 = FR_Floating_X, 0xe7 // @snan | @qnan | @inf | @zero
mov GR_Bias = 0x0ffff
}
{ .mfi
addl GR_Bias = 0x0FFFF,r0
fcvt.xf FR_N_float_int = FR_N_float_int
mov GR_Big = 35000 // If N this big then certain overflow
fcvt.fx.trunc.s1 FR_N_float_int = FR_Floating_N // Get N in significand
nop.i 0
};;
}
;;
{ .mfi
getf.exp GR_signexp_X = FR_Floating_X // Get signexp of x
fclass.m p7,p0 = FR_Floating_N, 0x0b // Test for n=unorm
nop.i 0
}
//
// Normalize n
//
{ .mfi
mov GR_exp_mask = 0x1ffff // Exponent mask
fnorm.s1 FR_Norm_N = FR_Floating_N
nop.i 0
}
;;
//
// Catch those fp values that are beyond 2**64-1
// Is N > 35000
// Is N < -35000
// Is n NAN, INF, ZERO, +-?
//
{ .mfi
cmp.ne.unc p9,p10 = GR_N_as_int,GR_Scratch
nop.f 0
nop.i 0
mov GR_big_exp = 0x1003e // Exponent at which n is integer
fclass.m p9,p0 = FR_Floating_N, 0xe7 // @snan | @qnan | @inf | @zero
mov GR_max_exp = 0x1007e // Exponent of maximum float
}
//
// Normalize x
//
{ .mfb
nop.m 0
fnorm.s1 FR_Norm_X = FR_Floating_X
(p7) br.cond.spnt SCALBF_N_UNORM // Branch if n=unorm
}
;;
SCALBF_COMMON1:
// Main path continues. Also return here from u=unorm path.
// Handle special cases if x = Nan, Inf, Zero
{ .mfb
nop.m 0
fcmp.lt.s1 p7,p0 = FR_Floating_N, f0 // Test N negative
(p6) br.cond.spnt SCALBF_NAN_INF_ZERO
}
;;
// Handle special cases if n = Nan, Inf, Zero
{ .mfi
getf.sig GR_N_as_int = FR_N_float_int // Get n from significand
fclass.m p8,p0 = FR_Floating_X, 0x0b // Test for x=unorm
mov GR_exp_sure_ou = 0x1000e // Exp_N where x*2^N sure over/under
}
{ .mfb
mov GR_min_exp = 0x0ff81 // Exponent of minimum float
fcvt.xf FR_N_float_int = FR_N_float_int // Convert N to FP integer
(p9) br.cond.spnt SCALBF_NAN_INF_ZERO
}
;;
{ .mmi
cmp.ge.unc p6, p0 = GR_N_as_int, GR_Big
cmp.le.unc p8, p0 = GR_N_as_int, GR_NBig
and GR_exp_N = GR_exp_mask, GR_signexp_N // Get exponent of N
(p7) sub GR_Big = r0, GR_Big // Limit for N
nop.i 0
};;
}
;;
//
// Is N really an int, only for those non-int indefinites?
// Create exp bias.
//
{ .mfi
{ .mib
cmp.lt p9,p0 = GR_exp_N, GR_big_exp // N possible non-integer?
cmp.ge p6,p0 = GR_exp_N, GR_exp_sure_ou // N certain over/under?
(p8) br.cond.spnt SCALBF_X_UNORM // Branch if x=unorm
}
;;
SCALBF_COMMON2:
// Main path continues. Also return here from x=unorm path.
// Create biased exponent for 2**N
{ .mmi
(p6) mov GR_N_as_int = GR_Big // Limit N
;;
add GR_N_Biased = GR_Bias,GR_N_as_int
(p9) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int
nop.i 0
};;
}
;;
{ .mfi
setf.exp FR_Two_N = GR_N_Biased // Form 2**N
(p9) fcmp.neq.unc.s1 p9,p0 = FR_Norm_N, FR_N_float_int // Test if N an integer
and GR_exp_X = GR_exp_mask, GR_signexp_X // Get exponent of X
}
;;
//
// Branch and return if N is not an int.
// Main path, create 2**N
// Compute biased result exponent
// Branch if N is not an integer
//
{ .mib
add GR_exp_Result = GR_exp_X, GR_N_as_int
mov GR_min_den_exp = 0x0ff81 - 23 // Exponent of min denorm float
(p9) br.cond.spnt SCALBF_N_NOT_INT
}
;;
//
// Raise Denormal operand flag with compare
// Do final operation
//
{ .mfi
setf.exp FR_Two_N = GR_N_Biased
nop.i 999
cmp.lt p7,p6 = GR_exp_Result, GR_max_exp // Test no overflow
fcmp.ge.s0 p0,p11 = FR_Floating_X,FR_Floating_N // Dummy to set denorm
cmp.lt p9,p0 = GR_exp_Result, GR_min_den_exp // Test sure underflow
}
{ .mfb
nop.m 0
(p7) frcpa.s0 f8,p11 = f0,f0
(p7) br.ret.spnt b0
};;
//
// Set denormal on denormal input x and denormal input N
//
{ .mfi
nop.m 999
(p10)fcmp.ge.s1 p6,p8 = FR_Norm_N,f0
nop.i 0
};;
{ .mfi
nop.m 999
fcmp.ge.s0 p0,p11 = FR_Floating_X,f0
nop.i 999
fma.s.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
(p9) br.cond.spnt SCALBF_UNDERFLOW // Branch if certain underflow
}
{ .mfi
nop.m 999
fcmp.ge.s0 p12,p13 = FR_Floating_N,f0
nop.i 0
};;
;;
//
// Adjust 2**N if N was very small or very large
//
{ .mib
(p6) cmp.gt.unc p6,p8 = GR_exp_Result, GR_max_exp // Test sure overflow
(p7) cmp.ge.unc p7,p9 = GR_exp_Result, GR_min_exp // Test no over/underflow
(p7) br.ret.sptk b0 // Return from main path
}
;;
{ .mfi
nop.m 0
(p6) fma.s1 FR_Two_N = FR_Big,f1,f0
nop.i 0
{ .bbb
(p6) br.cond.spnt SCALBF_OVERFLOW // Branch if certain overflow
(p8) br.cond.spnt SCALBF_POSSIBLE_OVERFLOW // Branch if possible overflow
(p9) br.cond.spnt SCALBF_POSSIBLE_UNDERFLOW // Branch if possible underflow
}
{ .mlx
nop.m 999
movl GR_Scratch = 0x000000000003007F
};;
{ .mfi
nop.m 0
(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0
nop.i 0
}
{ .mlx
nop.m 999
movl GR_Scratch1= 0x000000000001007F
};;
;;
// Here if possible underflow.
// Resulting exponent: 0x0ff81-23 <= exp_Result < 0x0ff81
SCALBF_POSSIBLE_UNDERFLOW:
//
// Here if possible overflow.
// Resulting exponent: 0x1007e = exp_Result
SCALBF_POSSIBLE_OVERFLOW:
// Set up necessary status fields
//
@ -254,137 +269,150 @@ GLOBAL_IEEE754_ENTRY(scalbf)
// S3 user supplied status + FZ + TD (Underflows)
//
{ .mfi
nop.m 999
mov GR_pos_ov_limit = 0x1007f // Exponent for positive overflow
fsetc.s3 0x7F,0x41
nop.i 999
nop.i 0
}
{ .mfi
nop.m 999
mov GR_neg_ov_limit = 0x3007f // Exponent for negative overflow
fsetc.s2 0x7F,0x42
nop.i 999
};;
nop.i 0
}
;;
//
// Do final operation
// Do final operation with s2 and s3
//
{ .mfi
setf.exp FR_NBig = GR_Scratch
fma.s.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
nop.i 999
setf.exp FR_NBig = GR_neg_ov_limit
fma.s.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
nop.i 0
}
{ .mfi
nop.m 999
fma.s.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
nop.i 999
};;
{ .mfi
setf.exp FR_Big = GR_Scratch1
setf.exp FR_Big = GR_pos_ov_limit
fma.s.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
nop.i 999
};;
nop.i 0
}
;;
// Check for overflow or underflow.
//
// S0 user supplied status
// S2 user supplied status + WRE + TD (Overflow)
// S3 user supplied status + FZ + TD (Underflow)
//
//
// Restore s3
// Restore s2
//
{ .mfi
nop.m 0
fsetc.s3 0x7F,0x40
nop.i 999
nop.i 0
}
{ .mfi
nop.m 0
fsetc.s2 0x7F,0x40
nop.i 999
};;
nop.i 0
}
;;
//
// Is the result zero?
//
{ .mfi
nop.m 999
fclass.m.unc p6, p0 = FR_Result3, 0x007
nop.i 999
nop.m 0
fclass.m p6, p0 = FR_Result3, 0x007
nop.i 0
}
{ .mfi
addl GR_Tag = 55, r0
fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big
nop.m 0
fcmp.ge.s1 p7, p8 = FR_Result2 , FR_Big
nop.i 0
};;
}
;;
//
// Detect masked underflow - Tiny + Inexact Only
//
{ .mfi
nop.m 999
nop.m 0
(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
nop.i 999
};;
nop.i 0
}
;;
//
// Is result bigger the allowed range?
// Branch out for underflow
//
{ .mfb
(p6) addl GR_Tag = 56, r0
nop.m 0
(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
(p6) br.cond.spnt SCALBF_UNDERFLOW
};;
}
;;
//
// Branch out for overflow
//
{ .mbb
nop.m 0
{ .bbb
(p7) br.cond.spnt SCALBF_OVERFLOW
(p9) br.cond.spnt SCALBF_OVERFLOW
};;
//
// Return from main path.
//
{ .mfb
nop.m 999
nop.f 0
br.ret.sptk b0;;
br.ret.sptk b0 // Return from main path.
}
;;
// Here if result overflows
SCALBF_OVERFLOW:
{ .mib
alloc r32=ar.pfs,3,0,4,0
addl GR_Tag = 55, r0 // Set error tag for overflow
br.cond.sptk __libm_error_region // Call error support for overflow
}
;;
// Here if result underflows
SCALBF_UNDERFLOW:
{ .mib
alloc r32=ar.pfs,3,0,4,0
addl GR_Tag = 56, r0 // Set error tag for underflow
br.cond.sptk __libm_error_region // Call error support for underflow
}
;;
SCALBF_NAN_INF_ZERO:
//
// Convert N to a fp integer
// Before entry, N has been converted to a fp integer in significand of
// FR_N_float_int
//
// Convert N_float_int to floating point value
//
{ .mfi
getf.sig GR_N_as_int = FR_N_float_int
fclass.m p6,p0 = FR_Floating_N, 0xc3 //@snan | @qnan
nop.i 0
}
{ .mfi
addl GR_Scratch = 1,r0
fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N
nop.i 999
}
{ .mfi
nop.m 0
fclass.m.unc p6,p0 = FR_Floating_N, 0xc3 //@snan | @qnan
fcvt.xf FR_N_float_int = FR_N_float_int
nop.i 0
};;
}
;;
{ .mfi
nop.m 0
fclass.m.unc p7,p0 = FR_Floating_X, 0xc3 //@snan | @qnan
fclass.m p7,p0 = FR_Floating_X, 0xc3 //@snan | @qnan
shl GR_Scratch = GR_Scratch,63
};;
}
;;
{ .mfi
nop.m 0
fclass.m.unc p8,p0 = FR_Floating_N, 0x21 // @inf
fclass.m p8,p0 = FR_Floating_N, 0x21 // @inf
nop.i 0
}
{ .mfi
nop.m 0
fclass.m.unc p9,p0 = FR_Floating_N, 0x22 // @-inf
fclass.m p9,p0 = FR_Floating_N, 0x22 // @-inf
nop.i 0
};;
}
;;
//
// Either X or N is a Nan, return result and possible raise invalid.
@ -393,12 +421,15 @@ SCALBF_NAN_INF_ZERO:
nop.m 0
(p6) fma.s.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
(p6) br.ret.spnt b0
};;
}
;;
{ .mfb
getf.sig GR_N_as_int = FR_N_float_int
nop.m 0
(p7) fma.s.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
(p7) br.ret.spnt b0
};;
}
;;
//
// If N + Inf do something special
@ -413,43 +444,38 @@ SCALBF_NAN_INF_ZERO:
nop.m 0
(p9) fnma.s.s0 FR_Floating_N = FR_Floating_N, f1, f0
nop.i 0
};;
}
;;
//
// If N==-Inf,return x/(-N)
//
{ .mfb
nop.m 0
(p9) frcpa.s0 FR_Result,p6 = FR_Floating_X,FR_Floating_N
cmp.ne p7,p0 = GR_N_as_int,GR_Scratch
(p9) frcpa.s0 FR_Result,p0 = FR_Floating_X,FR_Floating_N
(p9) br.ret.spnt b0
};;
//
// Convert N_float_int to floating point value
//
{ .mfi
cmp.ne.unc p9,p0 = GR_N_as_int,GR_Scratch
fcvt.xf FR_N_float_int = FR_N_float_int
nop.i 0
};;
}
;;
//
// Is N an integer.
//
{ .mfi
nop.m 0
(p9) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int
(p7) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int
nop.i 0
};;
}
;;
//
// If N not an int, return NaN and raise invalid.
//
{ .mfb
nop.m 0
(p7) frcpa.s0 FR_Result,p6 = f0,f0
(p7) frcpa.s0 FR_Result,p0 = f0,f0
(p7) br.ret.spnt b0
};;
}
;;
//
// Always return x in other path.
@ -458,13 +484,39 @@ SCALBF_NAN_INF_ZERO:
nop.m 0
fma.s.s0 FR_Result = FR_Floating_X,f1,f0
br.ret.sptk b0
};;
}
;;
// Here if n not int
// Return NaN and raise invalid.
SCALBF_N_NOT_INT:
{ .mfb
nop.m 0
frcpa.s0 FR_Result,p0 = f0,f0
br.ret.sptk b0
}
;;
// Here if n=unorm
SCALBF_N_UNORM:
{ .mfb
getf.exp GR_signexp_N = FR_Norm_N // Get signexp of normalized n
fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N // Get N in significand
br.cond.sptk SCALBF_COMMON1 // Return to main path
}
;;
// Here if x=unorm
SCALBF_X_UNORM:
{ .mib
getf.exp GR_signexp_X = FR_Norm_X // Get signexp of normalized x
nop.i 0
br.cond.sptk SCALBF_COMMON2 // Return to main path
}
;;
GLOBAL_IEEE754_END(scalbf)
__libm_error_region:
SCALBF_OVERFLOW:
SCALBF_UNDERFLOW:
LOCAL_LIBM_ENTRY(__libm_error_region)
//
// Get stack address of N
@ -517,9 +569,9 @@ SCALBF_UNDERFLOW:
// Get location of result on stack
//
{ .mmi
nop.m 0
nop.m 0
add GR_Parameter_RESULT = 48,sp
nop.m 0
nop.i 0
};;
//

View File

@ -43,38 +43,60 @@
// 01/26/01 Scalb completely reworked and now standalone version
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
// 08/06/03 Improved performance
//
// API
//==============================================================
// double-extended = scalbl (double-extended x, double-extended n)
// long double = scalbl (long double x, long double n)
// input floating point f8 and floating point f9
// output floating point f8
//
// int_type = 0 if int is 32 bits
// int_type = 1 if int is 64 bits
//
// Returns x* 2**n using an fma and detects overflow
// and underflow.
//
//
// Strategy:
// Compute biased exponent of result exp_Result = N + exp_X
// Break into ranges:
// exp_Result > 0x13ffe -> Certain overflow
// exp_Result = 0x13ffe -> Possible overflow
// 0x0c001 <= exp_Result < 0x13ffe -> No over/underflow (main path)
// 0x0c001 - 63 <= exp_Result < 0x0c001 -> Possible underflow
// exp_Result < 0x0c001 - 63 -> Certain underflow
FR_Big = f6
FR_NBig = f7
FR_Floating_X = f8
FR_Result = f8
FR_Floating_N = f9
FR_Result2 = f9
FR_Norm_N = f10
FR_Result3 = f11
FR_Norm_X = f12
FR_Result3 = f10
FR_Norm_X = f11
FR_Two_N = f12
FR_N_float_int = f13
FR_Two_N = f14
FR_Two_to_Big = f15
FR_Big = f6
FR_NBig = f7
FR_Norm_N = f14
GR_neg_ov_limit= r14
GR_big_exp = r14
GR_N_Biased = r15
GR_Big = r16
GR_NBig = r17
GR_Scratch = r18
GR_Scratch1 = r19
GR_exp_Result = r18
GR_pos_ov_limit= r19
GR_exp_sure_ou = r19
GR_Bias = r20
GR_N_as_int = r21
GR_signexp_X = r22
GR_exp_X = r23
GR_exp_mask = r24
GR_max_exp = r25
GR_min_exp = r26
GR_min_den_exp = r27
GR_Scratch = r28
GR_signexp_N = r29
GR_exp_N = r30
GR_SAVE_B0 = r32
GR_SAVE_GP = r33
@ -89,163 +111,156 @@ GLOBAL_IEEE754_ENTRY(scalbl)
//
// Is x NAN, INF, ZERO, +-?
//
{ .mfi
alloc r32=ar.pfs,0,3,4,0
fclass.m.unc p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
addl GR_Scratch = 0x019C3F,r0
}
//
// Is y a NAN, INF, ZERO, +-?
//
{ .mfi
nop.m 999
fclass.m.unc p6,p0 = FR_Floating_N, 0xe7 //@snan | @qnan | @inf | @zero
addl GR_Scratch1 = 0x063BF,r0
}
;;
//
// Convert N to a fp integer
// Normalize x
//
{ .mfi
nop.m 0
fnorm.s1 FR_Norm_N = FR_Floating_N
nop.i 999
}
{ .mfi
nop.m 999
fnorm.s1 FR_Norm_X = FR_Floating_X
nop.i 999
};;
//
// Create 2*big
// Create 2**-big
// Normalize x
// Branch on special values.
//
{ .mib
setf.exp FR_Big = GR_Scratch
nop.i 0
(p6) br.cond.spnt SCALBL_NAN_INF_ZERO
}
{ .mib
setf.exp FR_NBig = GR_Scratch1
nop.i 0
(p7) br.cond.spnt SCALBL_NAN_INF_ZERO
};;
//
// Convert N to a fp integer
// Create -35000
//
{ .mfi
addl GR_Scratch = 1,r0
fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N
addl GR_NBig = -35000,r0
}
;;
//
// Put N if a GP register
// Convert N_float_int to floating point value
// Create 35000
// Build the exponent Bias
//
{ .mii
getf.sig GR_N_as_int = FR_N_float_int
shl GR_Scratch = GR_Scratch,63
addl GR_Big = 35000,r0
{ .mfi
getf.exp GR_signexp_N = FR_Floating_N // Get signexp of n
fclass.m p6,p0 = FR_Floating_X, 0xe7 // @snan | @qnan | @inf | @zero
mov GR_Bias = 0x0ffff
}
{ .mfi
addl GR_Bias = 0x0FFFF,r0
fcvt.xf FR_N_float_int = FR_N_float_int
mov GR_Big = 35000 // If N this big then certain overflow
fcvt.fx.trunc.s1 FR_N_float_int = FR_Floating_N // Get N in significand
nop.i 0
};;
}
;;
{ .mfi
getf.exp GR_signexp_X = FR_Floating_X // Get signexp of x
fclass.m p7,p0 = FR_Floating_N, 0x0b // Test for n=unorm
nop.i 0
}
//
// Normalize n
//
{ .mfi
mov GR_exp_mask = 0x1ffff // Exponent mask
fnorm.s1 FR_Norm_N = FR_Floating_N
nop.i 0
}
;;
//
// Catch those fp values that are beyond 2**64-1
// Is N > 35000
// Is N < -35000
// Is n NAN, INF, ZERO, +-?
//
{ .mfi
cmp.ne.unc p9,p10 = GR_N_as_int,GR_Scratch
nop.f 0
nop.i 0
mov GR_big_exp = 0x1003e // Exponent at which n is integer
fclass.m p9,p0 = FR_Floating_N, 0xe7 // @snan | @qnan | @inf | @zero
mov GR_max_exp = 0x13ffe // Exponent of maximum long double
}
//
// Normalize x
//
{ .mfb
nop.m 0
fnorm.s1 FR_Norm_X = FR_Floating_X
(p7) br.cond.spnt SCALBL_N_UNORM // Branch if n=unorm
}
;;
SCALBL_COMMON1:
// Main path continues. Also return here from u=unorm path.
// Handle special cases if x = Nan, Inf, Zero
{ .mfb
nop.m 0
fcmp.lt.s1 p7,p0 = FR_Floating_N, f0 // Test N negative
(p6) br.cond.spnt SCALBL_NAN_INF_ZERO
}
;;
// Handle special cases if n = Nan, Inf, Zero
{ .mfi
getf.sig GR_N_as_int = FR_N_float_int // Get n from significand
fclass.m p8,p0 = FR_Floating_X, 0x0b // Test for x=unorm
mov GR_exp_sure_ou = 0x1000e // Exp_N where x*2^N sure over/under
}
{ .mfb
mov GR_min_exp = 0x0c001 // Exponent of minimum long double
fcvt.xf FR_N_float_int = FR_N_float_int // Convert N to FP integer
(p9) br.cond.spnt SCALBL_NAN_INF_ZERO
}
;;
{ .mmi
cmp.ge.unc p6, p0 = GR_N_as_int, GR_Big
cmp.le.unc p8, p0 = GR_N_as_int, GR_NBig
and GR_exp_N = GR_exp_mask, GR_signexp_N // Get exponent of N
(p7) sub GR_Big = r0, GR_Big // Limit for N
nop.i 0
};;
}
;;
//
// Is N really an int, only for those non-int indefinites?
// Create exp bias.
//
{ .mfi
{ .mib
cmp.lt p9,p0 = GR_exp_N, GR_big_exp // N possible non-integer?
cmp.ge p6,p0 = GR_exp_N, GR_exp_sure_ou // N certain over/under?
(p8) br.cond.spnt SCALBL_X_UNORM // Branch if x=unorm
}
;;
SCALBL_COMMON2:
// Main path continues. Also return here from x=unorm path.
// Create biased exponent for 2**N
{ .mmi
(p6) mov GR_N_as_int = GR_Big // Limit N
;;
add GR_N_Biased = GR_Bias,GR_N_as_int
(p9) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int
nop.i 0
};;
}
;;
{ .mfi
setf.exp FR_Two_N = GR_N_Biased // Form 2**N
(p9) fcmp.neq.unc.s1 p9,p0 = FR_Norm_N, FR_N_float_int // Test if N an integer
and GR_exp_X = GR_exp_mask, GR_signexp_X // Get exponent of X
}
;;
//
// Branch and return if N is not an int.
// Main path, create 2**N
// Compute biased result exponent
// Branch if N is not an integer
//
{ .mib
add GR_exp_Result = GR_exp_X, GR_N_as_int
mov GR_min_den_exp = 0x0c001 - 63 // Exp of min denorm long dble
(p9) br.cond.spnt SCALBL_N_NOT_INT
}
;;
//
// Raise Denormal operand flag with compare
// Do final operation
//
{ .mfi
setf.exp FR_Two_N = GR_N_Biased
nop.i 999
cmp.lt p7,p6 = GR_exp_Result, GR_max_exp // Test no overflow
fcmp.ge.s0 p0,p11 = FR_Floating_X,FR_Floating_N // Dummy to set denorm
cmp.lt p9,p0 = GR_exp_Result, GR_min_den_exp // Test sure underflow
}
{ .mfb
nop.m 0
(p7) frcpa.s0 f8,p11 = f0,f0
(p7) br.ret.spnt b0
};;
//
// Set denormal on denormal input x and denormal input N
//
{ .mfi
nop.m 999
(p10)fcmp.ge.s1 p6,p8 = FR_Norm_N,f0
nop.i 0
};;
{ .mfi
nop.m 999
fcmp.ge.s0 p0,p11 = FR_Floating_X,f0
nop.i 999
fma.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
(p9) br.cond.spnt SCALBL_UNDERFLOW // Branch if certain underflow
}
{ .mfi
nop.m 999
fcmp.ge.s0 p12,p13 = FR_Floating_N,f0
nop.i 0
};;
;;
//
// Adjust 2**N if N was very small or very large
//
{ .mib
(p6) cmp.gt.unc p6,p8 = GR_exp_Result, GR_max_exp // Test sure overflow
(p7) cmp.ge.unc p7,p9 = GR_exp_Result, GR_min_exp // Test no over/underflow
(p7) br.ret.sptk b0 // Return from main path
}
;;
{ .mfi
nop.m 0
(p6) fma.s1 FR_Two_N = FR_Big,f1,f0
nop.i 0
{ .bbb
(p6) br.cond.spnt SCALBL_OVERFLOW // Branch if certain overflow
(p8) br.cond.spnt SCALBL_POSSIBLE_OVERFLOW // Branch if possible overflow
(p9) br.cond.spnt SCALBL_POSSIBLE_UNDERFLOW // Branch if possible underflow
}
{ .mlx
nop.m 999
movl GR_Scratch = 0x0000000000033FFF
};;
{ .mfi
nop.m 0
(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0
nop.i 0
}
{ .mlx
nop.m 999
movl GR_Scratch1= 0x0000000000013FFF
};;
;;
// Here if possible underflow.
// Resulting exponent: 0x0c001-63 <= exp_Result < 0x0c001
SCALBL_POSSIBLE_UNDERFLOW:
//
// Here if possible overflow.
// Resulting exponent: 0x13ffe = exp_Result
SCALBL_POSSIBLE_OVERFLOW:
// Set up necessary status fields
//
@ -254,137 +269,150 @@ GLOBAL_IEEE754_ENTRY(scalbl)
// S3 user supplied status + FZ + TD (Underflows)
//
{ .mfi
nop.m 999
mov GR_pos_ov_limit = 0x13fff // Exponent for positive overflow
fsetc.s3 0x7F,0x41
nop.i 999
nop.i 0
}
{ .mfi
nop.m 999
mov GR_neg_ov_limit = 0x33fff // Exponent for negative overflow
fsetc.s2 0x7F,0x42
nop.i 999
};;
nop.i 0
}
;;
//
// Do final operation
// Do final operation with s2 and s3
//
{ .mfi
setf.exp FR_NBig = GR_Scratch
fma.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
nop.i 999
setf.exp FR_NBig = GR_neg_ov_limit
fma.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
nop.i 0
}
{ .mfi
nop.m 999
fma.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
nop.i 999
};;
{ .mfi
setf.exp FR_Big = GR_Scratch1
setf.exp FR_Big = GR_pos_ov_limit
fma.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
nop.i 999
};;
nop.i 0
}
;;
// Check for overflow or underflow.
//
// S0 user supplied status
// S2 user supplied status + WRE + TD (Overflow)
// S3 user supplied status + FZ + TD (Underflow)
//
//
// Restore s3
// Restore s2
//
{ .mfi
nop.m 0
fsetc.s3 0x7F,0x40
nop.i 999
nop.i 0
}
{ .mfi
nop.m 0
fsetc.s2 0x7F,0x40
nop.i 999
};;
nop.i 0
}
;;
//
// Is the result zero?
//
{ .mfi
nop.m 999
fclass.m.unc p6, p0 = FR_Result3, 0x007
nop.i 999
nop.m 0
fclass.m p6, p0 = FR_Result3, 0x007
nop.i 0
}
{ .mfi
addl GR_Tag = 51, r0
fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big
nop.m 0
fcmp.ge.s1 p7, p8 = FR_Result2 , FR_Big
nop.i 0
};;
}
;;
//
// Detect masked underflow - Tiny + Inexact Only
//
{ .mfi
nop.m 999
nop.m 0
(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
nop.i 999
};;
nop.i 0
}
;;
//
// Is result bigger the allowed range?
// Branch out for underflow
//
{ .mfb
(p6) addl GR_Tag = 52, r0
nop.m 0
(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
(p6) br.cond.spnt SCALBL_UNDERFLOW
};;
}
;;
//
// Branch out for overflow
//
{ .mbb
nop.m 0
{ .bbb
(p7) br.cond.spnt SCALBL_OVERFLOW
(p9) br.cond.spnt SCALBL_OVERFLOW
};;
//
// Return from main path.
//
{ .mfb
nop.m 999
nop.f 0
br.ret.sptk b0;;
br.ret.sptk b0 // Return from main path.
}
;;
// Here if result overflows
SCALBL_OVERFLOW:
{ .mib
alloc r32=ar.pfs,3,0,4,0
addl GR_Tag = 51, r0 // Set error tag for overflow
br.cond.sptk __libm_error_region // Call error support for overflow
}
;;
// Here if result underflows
SCALBL_UNDERFLOW:
{ .mib
alloc r32=ar.pfs,3,0,4,0
addl GR_Tag = 52, r0 // Set error tag for underflow
br.cond.sptk __libm_error_region // Call error support for underflow
}
;;
SCALBL_NAN_INF_ZERO:
//
// Convert N to a fp integer
// Before entry, N has been converted to a fp integer in significand of
// FR_N_float_int
//
// Convert N_float_int to floating point value
//
{ .mfi
getf.sig GR_N_as_int = FR_N_float_int
fclass.m p6,p0 = FR_Floating_N, 0xc3 //@snan | @qnan
nop.i 0
}
{ .mfi
addl GR_Scratch = 1,r0
fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N
nop.i 999
}
{ .mfi
nop.m 0
fclass.m.unc p6,p0 = FR_Floating_N, 0xc3 //@snan | @qnan
fcvt.xf FR_N_float_int = FR_N_float_int
nop.i 0
};;
}
;;
{ .mfi
nop.m 0
fclass.m.unc p7,p0 = FR_Floating_X, 0xc3 //@snan | @qnan
fclass.m p7,p0 = FR_Floating_X, 0xc3 //@snan | @qnan
shl GR_Scratch = GR_Scratch,63
};;
}
;;
{ .mfi
nop.m 0
fclass.m.unc p8,p0 = FR_Floating_N, 0x21 // @inf
fclass.m p8,p0 = FR_Floating_N, 0x21 // @inf
nop.i 0
}
{ .mfi
nop.m 0
fclass.m.unc p9,p0 = FR_Floating_N, 0x22 // @-inf
fclass.m p9,p0 = FR_Floating_N, 0x22 // @-inf
nop.i 0
};;
}
;;
//
// Either X or N is a Nan, return result and possible raise invalid.
@ -393,12 +421,15 @@ SCALBL_NAN_INF_ZERO:
nop.m 0
(p6) fma.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
(p6) br.ret.spnt b0
};;
}
;;
{ .mfb
getf.sig GR_N_as_int = FR_N_float_int
nop.m 0
(p7) fma.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
(p7) br.ret.spnt b0
};;
}
;;
//
// If N + Inf do something special
@ -413,43 +444,38 @@ SCALBL_NAN_INF_ZERO:
nop.m 0
(p9) fnma.s0 FR_Floating_N = FR_Floating_N, f1, f0
nop.i 0
};;
}
;;
//
// If N==-Inf,return x/(-N)
//
{ .mfb
nop.m 0
(p9) frcpa.s0 FR_Result,p6 = FR_Floating_X,FR_Floating_N
cmp.ne p7,p0 = GR_N_as_int,GR_Scratch
(p9) frcpa.s0 FR_Result,p0 = FR_Floating_X,FR_Floating_N
(p9) br.ret.spnt b0
};;
//
// Convert N_float_int to floating point value
//
{ .mfi
cmp.ne.unc p9,p0 = GR_N_as_int,GR_Scratch
fcvt.xf FR_N_float_int = FR_N_float_int
nop.i 0
};;
}
;;
//
// Is N an integer.
//
{ .mfi
nop.m 0
(p9) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int
(p7) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int
nop.i 0
};;
}
;;
//
// If N not an int, return NaN and raise invalid.
//
{ .mfb
nop.m 0
(p7) frcpa.s0 FR_Result,p6 = f0,f0
(p7) frcpa.s0 FR_Result,p0 = f0,f0
(p7) br.ret.spnt b0
};;
}
;;
//
// Always return x in other path.
@ -458,13 +484,39 @@ SCALBL_NAN_INF_ZERO:
nop.m 0
fma.s0 FR_Result = FR_Floating_X,f1,f0
br.ret.sptk b0
};;
}
;;
// Here if n not int
// Return NaN and raise invalid.
SCALBL_N_NOT_INT:
{ .mfb
nop.m 0
frcpa.s0 FR_Result,p0 = f0,f0
br.ret.sptk b0
}
;;
// Here if n=unorm
SCALBL_N_UNORM:
{ .mfb
getf.exp GR_signexp_N = FR_Norm_N // Get signexp of normalized n
fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N // Get N in significand
br.cond.sptk SCALBL_COMMON1 // Return to main path
}
;;
// Here if x=unorm
SCALBL_X_UNORM:
{ .mib
getf.exp GR_signexp_X = FR_Norm_X // Get signexp of normalized x
nop.i 0
br.cond.sptk SCALBL_COMMON2 // Return to main path
}
;;
GLOBAL_IEEE754_END(scalbl)
__libm_error_region:
SCALBL_OVERFLOW:
SCALBL_UNDERFLOW:
LOCAL_LIBM_ENTRY(__libm_error_region)
//
// Get stack address of N
@ -517,9 +569,9 @@ SCALBL_UNDERFLOW:
// Get location of result on stack
//
{ .mmi
nop.m 0
nop.m 0
add GR_Parameter_RESULT = 48,sp
nop.m 0
nop.i 0
};;
//

View File

@ -850,6 +850,7 @@ SINH_UNORM:
GLOBAL_IEEE754_END(sinh)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi

View File

@ -689,6 +689,7 @@ SINH_UNORM:
GLOBAL_IEEE754_END(sinhf)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi

View File

@ -1055,6 +1055,7 @@ SINH_HUGE:
GLOBAL_IEEE754_END(sinhl)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue

View File

@ -252,6 +252,7 @@ GLOBAL_IEEE754_ENTRY(sqrt)
}
// END DOUBLE PRECISION MINIMUM LATENCY SQUARE ROOT ALGORITHM
GLOBAL_IEEE754_END(sqrt)
// Stack operations when calling error support.
// (1) (2) (3) (call) (4)
// sp -> + psp -> + psp -> + sp -> +

View File

@ -204,6 +204,7 @@ GLOBAL_IEEE754_ENTRY(sqrtf)
//
GLOBAL_IEEE754_END(sqrtf)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mii

View File

@ -221,6 +221,7 @@ alloc r32= ar.pfs,0,5,4,0
// END DOUBLE EXTENDED PRECISION MINIMUM LATENCY SQUARE ROOT ALGORITHM
GLOBAL_IEEE754_END(sqrtl)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi

View File

@ -16,8 +16,18 @@ import_c() {
echo "$1 $libm_dir/$2 $3"
}
import_c DUMMY libm_support.h libm_support.h
import_c DUMMY libm_error.c libm_error.c
dummy_files="
libm_cpu_defs.h
libm_error_codes.h
libm_support.h
libm_error.c
"
for f in $dummy_files
do
import_c DUMMY $f $f
done
import_c scalblnf scalblnf.c s_scalblnf.c
for f in acos acosh asin atanh cosh exp2 exp10 fmod log2 pow remainder \

View File

@ -7,9 +7,12 @@ BEGIN {
getline;
while (!match($0, "^// WARRANTY DISCLAIMER")) {
print;
getline;
if (!getline) {
break;
}
getline;
}
if (getline)
{
printf \
"// Redistribution and use in source and binary forms, with or without\n" \
"// modification, are permitted provided that the following conditions are\n" \
@ -31,6 +34,7 @@ BEGIN {
} while (getline);
}
}
}
/^[.]data/ {
print "RODATA";
@ -115,7 +119,6 @@ BEGIN {
print
getline;
}
getline;
printf "%s_END(%s)\n", type, name;
if (match(name, "^exp10[fl]?$")) {
t=substr(name,6)

View File

@ -16,6 +16,7 @@ import_s() {
# $2 = source file-name
# $3 = destination file-name
echo "Importing $1 from $2 -> $3"
rm -f $3
awk -f import_file.awk FUNC=$1 $2 > $3
}
@ -24,19 +25,82 @@ import_c() {
# $2 = source file-name
# $3 = destination file-name
echo "Importing $1 from $2 -> $3"
rm -f $3
awk -f import_file.awk LICENSE_ONLY=y $2 > $3
}
do_imports() {
while read func_pattern src_file dst_file; do
if [ "$(expr $src_file : '.*\(c\)$')" = "c" ]; then
case $src_file in
*.[ch])
import_c "$func_pattern" "$src_file" "$dst_file"
else
;;
*)
import_s "$func_pattern" "$src_file" "$dst_file"
fi
;;
esac
done
}
./gen_import_file_list $libm_dir > import_file_list
do_imports < import_file_list
emptyfiles="
e_gamma_r.c
e_gammaf_r.c
e_gammal_r.c
s_sincos.c
s_sincosf.c
s_sincosl.c
t_exp.c
w_acosh.c
w_acoshf.c
w_acoshl.c
w_atanh.c
w_atanhf.c
w_atanhl.c
w_exp10.c
w_exp10f.c
w_exp10l.c
w_exp2.c
w_exp2f.c
w_exp2l.c
w_expl.c
w_lgamma_r.c
w_lgammaf_r.c
w_lgammal_r.c
w_log2.c
w_log2f.c
w_log2l.c
w_sinh.c
w_sinhf.c
w_sinhl.c
"
for f in $emptyfiles
do
rm -f $f
echo "/* Not needed. */" > $f
done
removedfiles="
libm_atan2_reg.S
s_ldexp.S
s_ldexpf.S
s_ldexpl.S
s_scalbn.S
s_scalbnf.S
s_scalbnl.S
"
rm -f $removedfiles
for f in lgammaf_r.c lgammal_r.c lgamma_r.c
do
import_c $f $libm_dir/$f e_$f
done
for f in lgamma.c lgammaf.c lgammal.c
do
import_c $f $libm_dir/$f w_$f
done

View File

@ -1,7 +1,7 @@
/* file: libm_error.c */
// Copyright (c) 2000 - 2003, Intel Corporation
// Copyright (c) 2000 - 2004, Intel Corporation
// All rights reserved.
//
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
@ -69,7 +69,22 @@
// Added code for tgamma
// 04/11/03: Corrected POSIX/SVID/XOPEN branches for gamma/lgamma
// to return EDOM for neg ints.
//
// 09/08/03: Corrected XOPEN/SVID result for pow overflow with neg x, pos y.
// 10/14/03: Added ILP32 ifdef
// 12/12/03: Corrected XOPEN/SVID results for powf_zero_to_negative,
// powl_neg_to_non_integer, atan2f_zero, atan2df_zero,
// acoshf_lt_one, acosh_lt_one.
// 12/07/04: Cast name strings as char *.
// 12/08/04: Corrected POSIX behavior for atan2_zero, acos_gt_one, asin_gt_one,
// log_negative, log10_negative, log1p_negative, and log2_negative.
// Added SVID and XOPEN case log2l_zero.
// 12/13/04: Corrected POSIX behavior for exp2_overflow, exp2_underflow,
// exp10_overflow, exp10_underflow. Added ISOC to set errno for
// exp10_underflow.
// 12/14/04: Corrected POSIX behavior for nextafter_overflow,
// nextafter_underflow, nexttoward_overflow, nexttoward_underflow.
// Added ISOC to set errno for nextafter and nexttoward underflow.
// 12/15/04: Corrected POSIX behavior for exp, exp2, and exp10 underflow.
#include <errno.h>
#include <stdio.h>
@ -127,7 +142,7 @@ struct exception exc;
struct exceptionf excf;
struct exceptionl excl;
# if defined(__GNUC__)
# ifdef __GNUC__
#define ALIGNIT __attribute__ ((__aligned__ (16)))
# elif defined opensource
#define ALIGNIT
@ -138,47 +153,55 @@ struct exceptionl excl;
# ifdef SIZE_LONG_INT_64
#define __INT_64__ signed long
# else
# if ILP32
#define __INT_64__ signed long long
# else
#define __INT_64__ __int64
# endif
# endif
const char float_inf[4] = {0x00,0x00,0x80,0x7F};
const char float_huge[4] = {0xFF,0xFF,0x7F,0x7F};
const char float_zero[4] = {0x00,0x00,0x00,0x00};
const char float_neg_inf[4] = {0x00,0x00,0x80,0xFF};
const char float_neg_huge[4] = {0xFF,0xFF,0x7F,0xFF};
const char float_neg_zero[4] = {0x00,0x00,0x00,0x80};
#define STATIC static
STATIC const char float_inf[4] = {0x00,0x00,0x80,0x7F};
STATIC const char float_huge[4] = {0xFF,0xFF,0x7F,0x7F};
STATIC const char float_zero[4] = {0x00,0x00,0x00,0x00};
STATIC const char float_neg_inf[4] = {0x00,0x00,0x80,0xFF};
STATIC const char float_neg_huge[4] = {0xFF,0xFF,0x7F,0xFF};
STATIC const char float_neg_zero[4] = {0x00,0x00,0x00,0x80};
ALIGNIT
const char double_inf[8] = {0x00,0x00,0x00,0x00,0x00,0x00,0xF0,0x7F};
#if 0 /* unused */
STATIC const char double_inf[8] = {0x00,0x00,0x00,0x00,0x00,0x00,0xF0,0x7F};
#ifndef _LIBC
ALIGNIT
const char double_huge[8] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xEF,0x7F};
STATIC const char double_huge[8] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xEF,0x7F};
#endif
ALIGNIT
const char double_zero[8] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
STATIC const char double_zero[8] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
ALIGNIT
const char double_neg_inf[8] = {0x00,0x00,0x00,0x00,0x00,0x00,0xF0,0xFF};
#if 0 /* unused */
STATIC const char double_neg_inf[8] = {0x00,0x00,0x00,0x00,0x00,0x00,0xF0,0xFF};
#ifndef _LIBC
ALIGNIT
const char double_neg_huge[8] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xEF,0xFF};
STATIC const char double_neg_huge[8] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xEF,0xFF};
#endif
ALIGNIT
const char double_neg_zero[8] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80};
STATIC const char double_neg_zero[8] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80};
ALIGNIT
const char long_double_inf[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0xFF,0x7F,0x00,0x00,0x00,0x00,0x00,0x00};
#if 0 /* unused */
STATIC const char long_double_inf[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0xFF,0x7F,0x00,0x00,0x00,0x00,0x00,0x00};
ALIGNIT
const char long_double_huge[16] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE,0x7F,0x00,0x00,0x00,0x00,0x00,0x00};
#ifndef _LIBC
STATIC const char long_double_huge[16] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE,0x7F,0x00,0x00,0x00,0x00,0x00,0x00};
#endif
ALIGNIT
const char long_double_zero[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
STATIC const char long_double_zero[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
ALIGNIT
const char long_double_neg_inf[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00};
#if 0 /* unused */
STATIC const char long_double_neg_inf[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00};
ALIGNIT
const char long_double_neg_huge[16] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE,0xFF,0x00,0x00,0x00,0x00,0x00,0x00};
#ifndef _LIBC
STATIC const char long_double_neg_huge[16] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE,0xFF,0x00,0x00,0x00,0x00,0x00,0x00};
#endif
ALIGNIT
const char long_double_neg_zero[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x00,0x00,0x00};
STATIC const char long_double_neg_zero[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x00,0x00,0x00};
#define RETVAL_HUGE_VALL *(long double *)retval = *(long double *)long_double_inf
#define RETVAL_NEG_HUGE_VALL *(long double *)retval = *(long double *)long_double_neg_inf
@ -195,6 +218,10 @@ const char long_double_neg_zero[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0
#define RETVAL_HUGEF *(float *)retval = *(float *) float_huge
#define RETVAL_NEG_HUGEF *(float *)retval = *(float *) float_neg_huge
#define ZEROL_VALUE *(long double *)long_double_zero
#define ZEROD_VALUE *(double *)double_zero
#define ZEROF_VALUE *(float *)float_zero
#define RETVAL_ZEROL *(long double *)retval = *(long double *)long_double_zero
#define RETVAL_ZEROD *(double *)retval = *(double *)double_zero
#define RETVAL_ZEROF *(float *)retval = *(float *)float_zero
@ -254,6 +281,7 @@ const char long_double_neg_zero[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0
#define INPUT_RESL (*(long double *)retval)
#define INPUT_RESD (*(double *)retval)
#define INPUT_RESF (*(float *)retval)
#define INPUT_RESI64 (*(__INT_64__ *)retval)
#define WRITEL_LOG_ZERO fputs("logl: SING error\n",stderr)
#define WRITED_LOG_ZERO fputs("log: SING error\n",stderr)
@ -271,7 +299,7 @@ const char long_double_neg_zero[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0
#define WRITED_Y1_ZERO fputs("y1: DOMAIN error\n",stderr)
#define WRITEF_Y1_ZERO fputs("y1f: DOMAIN error\n",stderr)
#define WRITEL_Y1_NEGATIVE fputs("y1l: DOMAIN error\n",stderr)
#define WRITED_Y1_NEGATIUE fputs("y1: DOMAIN error\n",stderr)
#define WRITED_Y1_NEGATIVE fputs("y1: DOMAIN error\n",stderr)
#define WRITEF_Y1_NEGATIVE fputs("y1f: DOMAIN error\n",stderr)
#define WRITEL_YN_ZERO fputs("ynl: DOMAIN error\n",stderr)
#define WRITED_YN_ZERO fputs("yn: DOMAIN error\n",stderr)
@ -339,9 +367,9 @@ const char long_double_neg_zero[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0
#define WRITEL_GAMMA_NEGATIVE fputs("gammal: SING error\n",stderr)
#define WRITED_GAMMA_NEGATIVE fputs("gamma: SING error\n",stderr)
#define WRITEF_GAMMA_NEGATIVE fputs("gammaf: SING error\n",stderr)
#define WRITEL_TGAMMA_NEGATIVE fputs("tgammal: DOMAIN error\n",stderr)
#define WRITED_TGAMMA_NEGATIVE fputs("tgamma: DOMAIN error\n",stderr)
#define WRITEF_TGAMMA_NEGATIVE fputs("tgammaf: DOMAIN error\n",stderr)
#define WRITEL_TGAMMA_NEGATIVE fputs("tgammal: SING error\n",stderr)
#define WRITED_TGAMMA_NEGATIVE fputs("tgamma: SING error\n",stderr)
#define WRITEF_TGAMMA_NEGATIVE fputs("tgammaf: SING error\n",stderr)
#define WRITEL_J0_TLOSS fputs("j0l: TLOSS error\n",stderr)
#define WRITEL_Y0_TLOSS fputs("y0l: TLOSS error\n",stderr)
#define WRITEL_J1_TLOSS fputs("j1l: TLOSS error\n",stderr)
@ -428,9 +456,15 @@ else if(_LIB_VERSIONIMF==_ISOC_)
case nextafterl_overflow:
case nextafter_overflow:
case nextafterf_overflow:
case nextafterl_underflow:
case nextafter_underflow:
case nextafterf_underflow:
case nexttowardl_overflow:
case nexttoward_overflow:
case nexttowardf_overflow:
case nexttowardl_underflow:
case nexttoward_underflow:
case nexttowardf_underflow:
case scalbnl_overflow:
case scalbn_overflow:
case scalbnf_overflow:
@ -496,6 +530,7 @@ else if(_LIB_VERSIONIMF==_ISOC_)
}
case powl_underflow:
case expl_underflow:
case exp10l_underflow:
case exp2l_underflow:
case scalbl_underflow:
case scalbnl_underflow:
@ -505,11 +540,14 @@ else if(_LIB_VERSIONIMF==_ISOC_)
case annuityl_underflow:
case compoundl_underflow:
{
if ( *(__INT_64__*)retval == 0 ) ERRNO_RANGE;
/* Test for zero by testing 64 significand bits for zero. An integer
test is needed so denormal flag is not set by a floating-point test */
if ( INPUT_RESI64 == 0 ) ERRNO_RANGE;
break;
}
case pow_underflow:
case exp_underflow:
case exp10_underflow:
case exp2_underflow:
case scalb_underflow:
case scalbn_underflow:
@ -519,11 +557,14 @@ else if(_LIB_VERSIONIMF==_ISOC_)
case annuity_underflow:
case compound_underflow:
{
if ( ((*(__INT_64__*)retval)<<1) == 0 ) ERRNO_RANGE;
/* Test for zero by testing exp and significand bits for zero. An integer
test is needed so denormal flag is not set by a floating-point test */
if ( (INPUT_RESI64 << 1) == 0 ) ERRNO_RANGE;
break;
}
case powf_underflow:
case expf_underflow:
case exp10f_underflow:
case exp2f_underflow:
case scalbf_underflow:
case scalbnf_underflow:
@ -533,7 +574,9 @@ else if(_LIB_VERSIONIMF==_ISOC_)
case annuityf_underflow:
case compoundf_underflow:
{
if ( ((*(__INT_64__*)retval)<<33) == 0 ) ERRNO_RANGE;
/* Test for zero by testing exp and significand bits for zero. An integer
test is needed so denormal flag is not set by a floating-point test */
if ( (INPUT_RESI64 << 33) == 0 ) ERRNO_RANGE;
break;
}
case logl_negative:
@ -656,10 +699,10 @@ switch(input_tag)
RETVAL_HUGE_VALF; ERRNO_RANGE; break;
}
case gammal_negative:
case lgammal_negative:
case gamma_negative:
case lgamma_negative:
case gammaf_negative:
case lgammal_negative:
case lgamma_negative:
case lgammaf_negative:
case tgammal_negative:
case tgamma_negative:
@ -697,6 +740,18 @@ switch(input_tag)
case sinhcoshl_overflow:
case sinhcosh_overflow:
case sinhcoshf_overflow:
case nextafterl_overflow:
case nextafter_overflow:
case nextafterf_overflow:
case nextafterl_underflow:
case nextafter_underflow:
case nextafterf_underflow:
case nexttowardl_overflow:
case nexttoward_overflow:
case nexttowardf_overflow:
case nexttowardl_underflow:
case nexttoward_underflow:
case nexttowardf_underflow:
{
ERRNO_RANGE; break;
}
@ -767,7 +822,10 @@ switch(input_tag)
/* y1l(x < 0) */
/* ynl(x < 0) */
{
RETVAL_NEG_HUGE_VALL; ERRNO_DOMAIN; break;
#ifndef _LIBC
RETVAL_NEG_HUGE_VALL;
#endif
ERRNO_DOMAIN; break;
}
case y0_negative:
case y1_negative:
@ -792,8 +850,9 @@ switch(input_tag)
case log10l_zero:
case log2l_zero:
/* logl(0) */
/* log1pl(0) */
/* log1pl(-1) */
/* log10l(0) */
/* log2l(0) */
{
RETVAL_NEG_HUGE_VALL; ERRNO_RANGE; break;
}
@ -802,8 +861,9 @@ switch(input_tag)
case log10_zero:
case log2_zero:
/* log(0) */
/* log1p(0) */
/* log1p(-1) */
/* log10(0) */
/* log2(0) */
{
RETVAL_NEG_HUGE_VALD; ERRNO_RANGE; break;
}
@ -812,8 +872,9 @@ switch(input_tag)
case log10f_zero:
case log2f_zero:
/* logf(0) */
/* log1pf(0) */
/* log1pf(-1) */
/* log10f(0) */
/* log2f(0) */
{
RETVAL_NEG_HUGE_VALF; ERRNO_RANGE; break;
}
@ -822,12 +883,10 @@ switch(input_tag)
case log10l_negative:
case log2l_negative:
/* logl(x < 0) */
/* log1pl(x < 0) */
/* log1pl(x < -1) */
/* log10l(x < 0) */
/* log2l(x < 0) */
{
#ifndef _LIBC
RETVAL_NEG_HUGE_VALL;
#endif
ERRNO_DOMAIN; break;
}
case log_negative:
@ -835,12 +894,10 @@ switch(input_tag)
case log10_negative:
case log2_negative:
/* log(x < 0) */
/* log1p(x < 0) */
/* log1p(x < -1) */
/* log10(x < 0) */
/* log2(x < 0) */
{
#ifndef _LIBC
RETVAL_NEG_HUGE_VALD;
#endif
ERRNO_DOMAIN; break;
}
case logf_negative:
@ -848,52 +905,63 @@ switch(input_tag)
case log10f_negative:
case log2f_negative:
/* logf(x < 0) */
/* log1pf(x < 0) */
/* log1pf(x < -1) */
/* log10f(x < 0) */
/* log2f(x < 0) */
{
#ifndef _LIBC
RETVAL_NEG_HUGE_VALF;
#endif
ERRNO_DOMAIN; break;
}
case expl_overflow:
case exp2l_overflow:
case exp10l_overflow:
case exp2l_overflow:
/* expl overflow */
/* exp10l overflow */
/* exp2l overflow */
{
RETVAL_HUGE_VALL; ERRNO_RANGE; break;
}
case exp_overflow:
case exp2_overflow:
case exp10_overflow:
case exp2_overflow:
/* exp overflow */
/* exp10 overflow */
/* exp2 overflow */
{
RETVAL_HUGE_VALD; ERRNO_RANGE; break;
}
case expf_overflow:
case exp2f_overflow:
case exp10f_overflow:
case exp2f_overflow:
/* expf overflow */
{
RETVAL_HUGE_VALF; ERRNO_RANGE; break;
}
case expl_underflow:
case exp10l_underflow:
case exp2l_underflow:
/* expl underflow */
/* exp10l underflow */
/* exp2l underflow */
{
RETVAL_ZEROL; ERRNO_RANGE; break;
ERRNO_RANGE; break;
}
case exp_underflow:
case exp10_underflow:
case exp2_underflow:
/* exp underflow */
/* exp10 underflow */
/* exp2 underflow */
{
RETVAL_ZEROD; ERRNO_RANGE; break;
ERRNO_RANGE; break;
}
case expf_underflow:
case exp10f_underflow:
case exp2f_underflow:
/* expf underflow */
/* exp10f underflow */
/* exp2f underflow */
{
RETVAL_ZEROF; ERRNO_RANGE; break;
ERRNO_RANGE; break;
}
case j0l_gt_loss:
case y0l_gt_loss:
@ -945,7 +1013,7 @@ switch(input_tag)
case compoundl_overflow:
/* powl(x,y) overflow */
{
if (INPUT_RESL < 0) RETVAL_NEG_HUGE_VALL;
if (INPUT_RESL < ZEROL_VALUE /*0*/) RETVAL_NEG_HUGE_VALL;
else RETVAL_HUGE_VALL;
ERRNO_RANGE; break;
}
@ -954,7 +1022,7 @@ switch(input_tag)
case compound_overflow:
/* pow(x,y) overflow */
{
if (INPUT_RESD < 0) RETVAL_NEG_HUGE_VALD;
if (INPUT_RESD < ZEROD_VALUE /*0*/) RETVAL_NEG_HUGE_VALD;
else RETVAL_HUGE_VALD;
ERRNO_RANGE; break;
}
@ -963,7 +1031,7 @@ switch(input_tag)
case compoundf_overflow:
/* powf(x,y) overflow */
{
if (INPUT_RESF < 0) RETVAL_NEG_HUGE_VALF;
if (INPUT_RESF < ZEROF_VALUE /*0*/) RETVAL_NEG_HUGE_VALF;
else RETVAL_HUGE_VALF;
ERRNO_RANGE; break;
}
@ -1051,51 +1119,24 @@ switch(input_tag)
}
case atan2l_zero:
case atan2dl_zero:
/* atan2l(0,0) */
/* atan2dl(0,0) */
{
#ifndef _LIBC
RETVAL_ZEROL;
#else
/* XXX arg1 and arg2 are switched!!!! */
if (signbit (*(long double *) arg1))
/* y == -0 */
*(long double *) retval = __libm_copysignl (M_PIl, *(long double *) arg2);
else
*(long double *) retval = *(long double *) arg2;
#endif
ERRNO_DOMAIN; break;
break;
}
case atan2_zero:
case atan2d_zero:
/* atan2(0,0) */
/* atan2d(0,0) */
{
#ifndef _LIBC
RETVAL_ZEROD;
#else
/* XXX arg1 and arg2 are switched!!!! */
if (signbit (*(double *) arg1))
/* y == -0 */
*(double *) retval = __libm_copysign (M_PI, *(double *) arg2);
else
*(double *) retval = *(double *) arg2;
#endif
ERRNO_DOMAIN; break;
break;
}
case atan2f_zero:
case atan2df_zero:
/* atan2f(0,0) */
/* atan2df(0,0) */
{
#ifndef _LIBC
RETVAL_ZEROF;
#else
if (signbit (*(float *) arg2))
/* y == -0 */
*(float *) retval = __libm_copysignf (M_PI, *(float *) arg1);
else
*(float *) retval = *(float *) arg1;
#endif
ERRNO_DOMAIN; break;
break;
}
case expm1l_overflow:
/* expm1 overflow */
@ -1145,42 +1186,42 @@ switch(input_tag)
case scalbl_underflow:
/* scalbl underflow */
{
if (INPUT_XL < 0) RETVAL_NEG_ZEROL;
if (INPUT_XL < ZEROL_VALUE /*0*/) RETVAL_NEG_ZEROL;
else RETVAL_ZEROL;
ERRNO_RANGE; break;
}
case scalb_underflow:
/* scalb underflow */
{
if (INPUT_XD < 0) RETVAL_NEG_ZEROD;
if (INPUT_XD < ZEROD_VALUE /*0*/) RETVAL_NEG_ZEROD;
else RETVAL_ZEROD;
ERRNO_RANGE; break;
}
case scalbf_underflow:
/* scalbf underflow */
{
if (INPUT_XF < 0) RETVAL_NEG_ZEROF;
if (INPUT_XF < ZEROF_VALUE /*0*/) RETVAL_NEG_ZEROF;
else RETVAL_ZEROF;
ERRNO_RANGE; break;
}
case scalbl_overflow:
/* scalbl overflow */
{
if (INPUT_XL < 0) RETVAL_NEG_HUGE_VALL;
if (INPUT_XL < ZEROL_VALUE /*0*/) RETVAL_NEG_HUGE_VALL;
else RETVAL_HUGE_VALL;
ERRNO_RANGE; break;
}
case scalb_overflow:
/* scalb overflow */
{
if (INPUT_XD < 0) RETVAL_NEG_HUGE_VALD;
if (INPUT_XD < ZEROD_VALUE /*0*/) RETVAL_NEG_HUGE_VALD;
else RETVAL_HUGE_VALD;
ERRNO_RANGE; break;
}
case scalbf_overflow:
/* scalbf overflow */
{
if (INPUT_XF < 0) RETVAL_NEG_HUGE_VALF;
if (INPUT_XF < ZEROF_VALUE /*0*/) RETVAL_NEG_HUGE_VALF;
else RETVAL_HUGE_VALF;
ERRNO_RANGE; break;
}
@ -1204,9 +1245,6 @@ switch(input_tag)
/* acosl(x > 1) */
/* acosdl(x > 1) */
{
#ifndef _LIBC
RETVAL_ZEROL;
#endif
ERRNO_DOMAIN; break;
}
case acos_gt_one:
@ -1214,9 +1252,6 @@ switch(input_tag)
/* acos(x > 1) */
/* acosd(x > 1) */
{
#ifndef _LIBC
RETVAL_ZEROD;
#endif
ERRNO_DOMAIN; break;
}
case acosf_gt_one:
@ -1224,9 +1259,6 @@ switch(input_tag)
/* acosf(x > 1) */
/* acosdf(x > 1) */
{
#ifndef _LIBC
RETVAL_ZEROF;
#endif
ERRNO_DOMAIN; break;
}
case asinl_gt_one:
@ -1234,9 +1266,6 @@ switch(input_tag)
/* asinl(x > 1) */
/* asindl(x > 1) */
{
#ifndef _LIBC
RETVAL_ZEROL;
#endif
ERRNO_DOMAIN; break;
}
case asin_gt_one:
@ -1244,18 +1273,13 @@ switch(input_tag)
/* asin(x > 1) */
/* asind(x > 1) */
{
#ifndef _LIBC
RETVAL_ZEROD;
#endif
ERRNO_DOMAIN; break;
}
case asinf_gt_one:
case asindf_gt_one:
/* asinf(x > 1) */
/* asindf(x > 1) */
{
#ifndef _LIBC
RETVAL_ZEROF;
#endif
ERRNO_DOMAIN; break;
}
case remainderl_by_zero:
@ -1291,33 +1315,24 @@ switch(input_tag)
{
RETVAL_HUGE_VALF; ERRNO_RANGE; break;
}
case nextafterl_overflow:
case nextafter_overflow:
case nextafterf_overflow:
case nexttowardl_overflow:
case nexttoward_overflow:
case nexttowardf_overflow:
{
ERRNO_RANGE; break;
}
case sinhl_overflow:
/* sinhl overflows */
{
if (INPUT_XL > 0) RETVAL_HUGE_VALL;
if (INPUT_XL > ZEROL_VALUE /*0*/) RETVAL_HUGE_VALL;
else RETVAL_NEG_HUGE_VALL;
ERRNO_RANGE; break;
}
case sinh_overflow:
/* sinh overflows */
{
if (INPUT_XD > 0) RETVAL_HUGE_VALD;
if (INPUT_XD > ZEROD_VALUE /*0*/) RETVAL_HUGE_VALD;
else RETVAL_NEG_HUGE_VALD;
ERRNO_RANGE; break;
}
case sinhf_overflow:
/* sinhf overflows */
{
if (INPUT_XF > 0) RETVAL_HUGE_VALF;
if (INPUT_XF > ZEROF_VALUE /*0*/) RETVAL_HUGE_VALF;
else RETVAL_NEG_HUGE_VALF;
ERRNO_RANGE; break;
}
@ -1862,6 +1877,27 @@ else
*(float *)retval = excf.retval;
break;
}
case log2l_zero:
/* log2l(0) */
{
SINGL; NAMEL = (char *) "log2l";
ifSVID
{
RETVAL_NEG_HUGEL;
NOT_MATHERRL
{
WRITEL_LOG2_ZERO;
ERRNO_DOMAIN;
}
}
else
{
RETVAL_NEG_HUGE_VALL;
NOT_MATHERRL {ERRNO_DOMAIN;}
}
*(long double *)retval = excl.retval;
break;
}
case log2_zero:
/* log2(0) */
{
@ -2096,12 +2132,12 @@ else
OVERFLOWL; NAMEL = (char *) "powl";
ifSVID
{
if (INPUT_XL < 0) RETVAL_NEG_HUGEL;
if (INPUT_RESL < ZEROL_VALUE /*0*/) RETVAL_NEG_HUGEL;
else RETVAL_HUGEL;
}
else
{
if (INPUT_XL < 0) RETVAL_NEG_HUGE_VALL;
if (INPUT_RESL < ZEROL_VALUE /*0*/) RETVAL_NEG_HUGE_VALL;
else RETVAL_HUGE_VALL;
}
NOT_MATHERRL {ERRNO_RANGE;}
@ -2114,12 +2150,12 @@ else
OVERFLOWD; NAMED = (char *) "pow";
ifSVID
{
if (INPUT_XD < 0) RETVAL_NEG_HUGED;
if (INPUT_RESD < ZEROD_VALUE /*0*/) RETVAL_NEG_HUGED;
else RETVAL_HUGED;
}
else
{
if (INPUT_XD < 0) RETVAL_NEG_HUGE_VALD;
if (INPUT_RESD < ZEROD_VALUE /*0*/) RETVAL_NEG_HUGE_VALD;
else RETVAL_HUGE_VALD;
}
NOT_MATHERRD {ERRNO_RANGE;}
@ -2132,12 +2168,12 @@ else
OVERFLOWF; NAMEF = (char *) "powf";
ifSVID
{
if (INPUT_XF < 0) RETVAL_NEG_HUGEF;
if (INPUT_RESF < ZEROF_VALUE /*0*/) RETVAL_NEG_HUGEF;
else RETVAL_HUGEF;
}
else
{
if (INPUT_XF < 0) RETVAL_NEG_HUGE_VALF;
if (INPUT_RESF < ZEROF_VALUE /*0*/) RETVAL_NEG_HUGE_VALF;
else RETVAL_HUGE_VALF;
}
NOT_MATHERRF {ERRNO_RANGE;}
@ -2214,7 +2250,6 @@ else
/* 0**neg */
{
DOMAINF; NAMEF = (char *) "powf";
RETVAL_NEG_HUGE_VALF;
ifSVID
{
RETVAL_ZEROF;
@ -2238,7 +2273,7 @@ else
DOMAINL; NAMEL = (char *) "powl";
ifSVID
{
RETVAL_ZEROF;
RETVAL_ZEROL;
NOT_MATHERRL
{
WRITEL_POW_NEG_TO_NON_INTEGER;
@ -2360,11 +2395,13 @@ else
DOMAINF; NAMEF = (char *) "atan2f";
RETVAL_ZEROF;
NOT_MATHERRF
{
ifSVID
{
WRITEF_ATAN2_ZERO_BY_ZERO;
}
ERRNO_DOMAIN;
}
*(float *)retval = excf.retval;
break;
}
@ -2406,11 +2443,13 @@ else
DOMAINF; NAMEF = (char *) "atan2df";
RETVAL_ZEROF;
NOT_MATHERRF
{
ifSVID
{
WRITEF_ATAN2D_ZERO_BY_ZERO;
}
ERRNO_DOMAIN;
}
*(float *)retval = excf.retval;
break;
}
@ -2446,7 +2485,7 @@ else
/* scalbl underflow */
{
UNDERFLOWL; NAMEL = (char *) "scalbl";
if (INPUT_XL < 0.0L) RETVAL_NEG_ZEROL;
if (INPUT_XL < ZEROL_VALUE /*0.0L*/) RETVAL_NEG_ZEROL;
else RETVAL_ZEROL;
NOT_MATHERRL {ERRNO_RANGE;}
*(long double *)retval = excl.retval;
@ -2456,7 +2495,7 @@ else
/* scalb underflow */
{
UNDERFLOWD; NAMED = (char *) "scalb";
if (INPUT_XD < 0.0) RETVAL_NEG_ZEROD;
if (INPUT_XD < ZEROD_VALUE /*0.0*/) RETVAL_NEG_ZEROD;
else RETVAL_ZEROD;
NOT_MATHERRD {ERRNO_RANGE;}
*(double *)retval = exc.retval;
@ -2466,7 +2505,7 @@ else
/* scalbf underflow */
{
UNDERFLOWF; NAMEF = (char *) "scalbf";
if (INPUT_XF < 0.0) RETVAL_NEG_ZEROF;
if (INPUT_XF < ZEROF_VALUE /*0.0*/) RETVAL_NEG_ZEROF;
else RETVAL_ZEROF;
NOT_MATHERRF {ERRNO_RANGE;}
*(float *)retval = excf.retval;
@ -2476,7 +2515,7 @@ else
/* scalbl overflow */
{
OVERFLOWL; NAMEL = (char *) "scalbl";
if (INPUT_XL < 0) RETVAL_NEG_HUGE_VALL;
if (INPUT_XL < ZEROL_VALUE /*0*/) RETVAL_NEG_HUGE_VALL;
else RETVAL_HUGE_VALL;
NOT_MATHERRL {ERRNO_RANGE;}
*(long double *)retval = excl.retval;
@ -2486,7 +2525,7 @@ else
/* scalb overflow */
{
OVERFLOWD; NAMED = (char *) "scalb";
if (INPUT_XD < 0) RETVAL_NEG_HUGE_VALD;
if (INPUT_XD < ZEROD_VALUE /*0*/) RETVAL_NEG_HUGE_VALD;
else RETVAL_HUGE_VALD;
NOT_MATHERRD {ERRNO_RANGE;}
*(double *)retval = exc.retval;
@ -2496,7 +2535,7 @@ else
/* scalbf overflow */
{
OVERFLOWF; NAMEF = (char *) "scalbf";
if (INPUT_XF < 0) RETVAL_NEG_HUGE_VALF;
if (INPUT_XF < ZEROF_VALUE /*0*/) RETVAL_NEG_HUGE_VALF;
else RETVAL_HUGE_VALF;
NOT_MATHERRF {ERRNO_RANGE;}
*(float *)retval = excf.retval;
@ -2844,12 +2883,12 @@ else
OVERFLOWL; NAMEL = (char *) "sinhl";
ifSVID
{
if (INPUT_XL > 0.0) RETVAL_HUGEL;
if (INPUT_XL > ZEROL_VALUE /*0.0*/) RETVAL_HUGEL;
else RETVAL_NEG_HUGEL;
}
else
{
if (INPUT_XL > 0.0) RETVAL_HUGE_VALL;
if (INPUT_XL > ZEROL_VALUE /*0.0*/) RETVAL_HUGE_VALL;
else RETVAL_NEG_HUGE_VALL;
}
NOT_MATHERRL {ERRNO_RANGE;}
@ -2862,12 +2901,12 @@ else
OVERFLOWD; NAMED = (char *) "sinh";
ifSVID
{
if (INPUT_XD > 0.0) RETVAL_HUGED;
if (INPUT_XD > ZEROD_VALUE /*0.0*/) RETVAL_HUGED;
else RETVAL_NEG_HUGED;
}
else
{
if (INPUT_XD > 0.0) RETVAL_HUGE_VALD;
if (INPUT_XD > ZEROD_VALUE /*0.0*/) RETVAL_HUGE_VALD;
else RETVAL_NEG_HUGE_VALD;
}
NOT_MATHERRD {ERRNO_RANGE;}
@ -2880,12 +2919,12 @@ else
OVERFLOWF; NAMEF = (char *) "sinhf";
ifSVID
{
if( INPUT_XF > 0.0) RETVAL_HUGEF;
if (INPUT_XF > ZEROF_VALUE /*0.0*/) RETVAL_HUGEF;
else RETVAL_NEG_HUGEF;
}
else
{
if (INPUT_XF > 0.0) RETVAL_HUGE_VALF;
if (INPUT_XF > ZEROF_VALUE /*0.0*/) RETVAL_HUGE_VALF;
else RETVAL_NEG_HUGE_VALF;
}
NOT_MATHERRF {ERRNO_RANGE;}
@ -2919,7 +2958,7 @@ else
{
NOT_MATHERRD
{
WRITEL_ACOSH;
WRITED_ACOSH;
ERRNO_DOMAIN;
}
}
@ -2947,7 +2986,7 @@ else
NOT_MATHERRF {ERRNO_DOMAIN;}
}
*(float *)retval = excf.retval;
ERRNO_DOMAIN; break;
break;
}
case atanhl_gt_one:
/* atanhl(|x| > 1) */
@ -3225,7 +3264,7 @@ else
RETVAL_HUGEL;
NOT_MATHERRL
{
WRITEL_GAMMA_NEGATIVE;
WRITEL_LGAMMA_NEGATIVE;
ERRNO_DOMAIN;
}
}
@ -3801,7 +3840,8 @@ else
WRITEF_Y1_ZERO;
ERRNO_DOMAIN;
}
}else
}
else
{
RETVAL_NEG_HUGE_VALF;
NOT_MATHERRF {ERRNO_DOMAIN;}
@ -4025,7 +4065,7 @@ else
RETVAL_NEG_HUGED;
NOT_MATHERRD
{
WRITED_Y1_NEGATIUE;
WRITED_Y1_NEGATIVE;
ERRNO_DOMAIN;
}
}

View File

@ -47,6 +47,7 @@
// 09/15/02 Fixed bug on the branch lgamma_negrecursion
// 10/21/02 Now it returns SIGN(GAMMA(x))=-1 for negative zero
// 02/10/03 Reordered header: .section, .global, .proc, .align
// 07/22/03 Reformatted some data tables
//
//*********************************************************************
//
@ -951,19 +952,32 @@ data8 0xD28D3312983E98A0,0xBFFF //S2
//
data8 0x8090F777D7942F73,0x4001 // PR01
data8 0xE5B521193CF61E63,0x4000 // PR11
data8 0xC02C000000001939,0x0000000000000233 // (-15;-14)
data8 0xC02A000000016124,0x0000000000002BFB // (-14;-13)
data8 0xC02800000011EED9,0x0000000000025CBB // (-13;-12)
data8 0xC026000000D7322A,0x00000000001E1095 // (-12;-11)
data8 0xC0240000093F2777,0x00000000013DD3DC // (-11;-10)
data8 0xC02200005C7768FB,0x000000000C9539B9 // (-10;-9)
data8 0xC02000034028B3F9,0x000000007570C565 // (-9;-8)
data8 0xC01C0033FDEDFE1F,0x00000007357E670E // (-8;-7)
data8 0xC018016B25897C8D,0x000000346DC5D639 // (-7;-6)
data8 0xC014086A57F0B6D9,0x0000010624DD2F1B // (-6;-5)
data8 0xC010284E78599581,0x0000051EB851EB85 // (-5;-4)
data8 0xC009260DBC9E59AF,0x000028F5C28F5C29 // (-4;-3)
data8 0xC003A7FC9600F86C,0x0000666666666666 // (-3;-2)
data8 0xC02C000000001939 // (-15;-14)
data8 0x0000000000000233 // (-15;-14)
data8 0xC02A000000016124 // (-14;-13)
data8 0x0000000000002BFB // (-14;-13)
data8 0xC02800000011EED9 // (-13;-12)
data8 0x0000000000025CBB // (-13;-12)
data8 0xC026000000D7322A // (-12;-11)
data8 0x00000000001E1095 // (-12;-11)
data8 0xC0240000093F2777 // (-11;-10)
data8 0x00000000013DD3DC // (-11;-10)
data8 0xC02200005C7768FB // (-10;-9)
data8 0x000000000C9539B9 // (-10;-9)
data8 0xC02000034028B3F9 // (-9;-8)
data8 0x000000007570C565 // (-9;-8)
data8 0xC01C0033FDEDFE1F // (-8;-7)
data8 0x00000007357E670E // (-8;-7)
data8 0xC018016B25897C8D // (-7;-6)
data8 0x000000346DC5D639 // (-7;-6)
data8 0xC014086A57F0B6D9 // (-6;-5)
data8 0x0000010624DD2F1B // (-6;-5)
data8 0xC010284E78599581 // (-5;-4)
data8 0x0000051EB851EB85 // (-5;-4)
data8 0xC009260DBC9E59AF // (-4;-3)
data8 0x000028F5C28F5C29 // (-4;-3)
data8 0xC003A7FC9600F86C // (-3;-2)
data8 0x0000666666666666 // (-3;-2)
data8 0xCC15879606130890,0x4000 // PR21
data8 0xB42FE3281465E1CC,0x4000 // PR31
//
@ -971,19 +985,32 @@ data8 0x828185F0B95C9916,0x4001 // PR00
//
data8 0xD4D3C819E4E5654B,0x4000 // PR10
data8 0xA82FBBA4FCC75298,0x4000 // PR20
data8 0xC02DFFFFFFFFFE52,0x000000000000001C // (-15;-14)
data8 0xC02BFFFFFFFFE6C7,0x00000000000001A6 // (-14;-13)
data8 0xC029FFFFFFFE9EDC,0x0000000000002BFB // (-13;-12)
data8 0xC027FFFFFFEE1127,0x000000000001EEC8 // (-12;-11)
data8 0xC025FFFFFF28CDD4,0x00000000001E1095 // (-11;-10)
data8 0xC023FFFFF6C0D7C0,0x000000000101B2B3 // (-10;-9)
data8 0xC021FFFFA3884BD0,0x000000000D6BF94D // (-9;-8)
data8 0xC01FFFF97F8159CF,0x00000000C9539B89 // (-8;-7)
data8 0xC01BFFCBF76B86F0,0x00000007357E670E // (-7;-6)
data8 0xC017FE92F591F40D,0x000000346DC5D639 // (-6;-5)
data8 0xC013F7577A6EEAFD,0x00000147AE147AE1 // (-5;-4)
data8 0xC00FA471547C2FE5,0x00000C49BA5E353F // (-4;-3)
data8 0xC005FB410A1BD901,0x000053F7CED91687 // (-3;-2)
data8 0xC02DFFFFFFFFFE52 // (-15;-14)
data8 0x000000000000001C // (-15;-14)
data8 0xC02BFFFFFFFFE6C7 // (-14;-13)
data8 0x00000000000001A6 // (-14;-13)
data8 0xC029FFFFFFFE9EDC // (-13;-12)
data8 0x0000000000002BFB // (-13;-12)
data8 0xC027FFFFFFEE1127 // (-12;-11)
data8 0x000000000001EEC8 // (-12;-11)
data8 0xC025FFFFFF28CDD4 // (-11;-10)
data8 0x00000000001E1095 // (-11;-10)
data8 0xC023FFFFF6C0D7C0 // (-10;-9)
data8 0x000000000101B2B3 // (-10;-9)
data8 0xC021FFFFA3884BD0 // (-9;-8)
data8 0x000000000D6BF94D // (-9;-8)
data8 0xC01FFFF97F8159CF // (-8;-7)
data8 0x00000000C9539B89 // (-8;-7)
data8 0xC01BFFCBF76B86F0 // (-7;-6)
data8 0x00000007357E670E // (-7;-6)
data8 0xC017FE92F591F40D // (-6;-5)
data8 0x000000346DC5D639 // (-6;-5)
data8 0xC013F7577A6EEAFD // (-5;-4)
data8 0x00000147AE147AE1 // (-5;-4)
data8 0xC00FA471547C2FE5 // (-4;-3)
data8 0x00000C49BA5E353F // (-4;-3)
data8 0xC005FB410A1BD901 // (-3;-2)
data8 0x000053F7CED91687 // (-3;-2)
data8 0x80151BB918A293AA,0x4000 // PR30
data8 0xB3C9F8F47422A314,0x400B // PRN
//
@ -3538,6 +3565,7 @@ lgamma_libm_err:
};;
GLOBAL_LIBM_END(__libm_lgamma)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi

View File

@ -47,6 +47,7 @@
// 09/16/02 Improved accuracy on intervals reduced to [1;1.25]
// 10/21/02 Now it returns SIGN(GAMMA(x))=-1 for negative zero
// 02/10/03 Reordered header: .section, .global, .proc, .align
// 07/22/03 Reformatted some data tables
//
//*********************************************************************
//
@ -685,19 +686,26 @@ data8 0x3FF1029A9DD542B4,0xBFFAD37C209D3B25 // A6,A5
data8 0x405385E6FD9BE7EA // A0
data8 0x478895F1C0000000 // Overflow boundary
data8 0x400062D97D26B523,0xC00A03E1529FF023 // A6,A5
data8 0x4069204C51E566CE,0 // A0
data8 0x4069204C51E566CE // A0
data8 0x0000000000000000 // pad
data8 0x40101476B38FD501,0xC0199DE7B387C0FC // A6,A5
data8 0x407EB8DAEC83D759,0 // A0
data8 0x407EB8DAEC83D759 // A0
data8 0x0000000000000000 // pad
data8 0x401FDB008D65125A,0xC0296B506E665581 // A6,A5
data8 0x409226D93107EF66,0 // A0
data8 0x409226D93107EF66 // A0
data8 0x0000000000000000 // pad
data8 0x402FB3EAAF3E7B2D,0xC039521142AD8E0D // A6,A5
data8 0x40A4EFA4F072792E,0 // A0
data8 0x40A4EFA4F072792E // A0
data8 0x0000000000000000 // pad
data8 0x403FA024C66B2563,0xC0494569F250E691 // A6,A5
data8 0x40B7B747C9235BB8,0 // A0
data8 0x40B7B747C9235BB8 // A0
data8 0x0000000000000000 // pad
data8 0x404F9607D6DA512C,0xC0593F0B2EDDB4BC // A6,A5
data8 0x40CA7E29C5F16DE2,0 // A0
data8 0x40CA7E29C5F16DE2 // A0
data8 0x0000000000000000 // pad
data8 0x405F90C5F613D98D,0xC0693BD130E50AAF // A6,A5
data8 0x40DD4495238B190C,0 // A0
data8 0x40DD4495238B190C // A0
data8 0x0000000000000000 // pad
//
// polynomial approximation of ln(sin(Pi*x)/(Pi*x)), |x| <= 0.5
data8 0xBFD58731A486E820,0xBFA4452CC28E15A9 // S16,S14
@ -2133,6 +2141,7 @@ lgammaf_libm_err:
};;
GLOBAL_LIBM_END(__libm_lgammaf)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi

View File

@ -7622,6 +7622,7 @@ lgammal_singularity:
GLOBAL_LIBM_END(__libm_lgammal)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi

View File

@ -44,38 +44,51 @@
// 02/06/02 Corrected to handle 32- or 64-bit integers
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
// 08/25/03 Improved performance
//
// API
//==============================================================
// float = __libm_scalblnf (float x, long int n, int long_int_type)
// float __libm_scalblnf (float x, long int n, int long_int_type)
// input floating point f8 and long int n (r33)
// input long_int_type = 0 if long int defined as 32 bits, = 1 if 64 bits
//
// output floating point f8
//
// Returns x* 2**n using an fma and detects overflow
// and underflow.
//
//
// Strategy:
// Compute biased exponent of result exp_Result = N + exp_X
// Break into ranges:
// exp_Result > 0x1007e -> Certain overflow
// exp_Result = 0x1007e -> Possible overflow
// 0x0ff81 <= exp_Result < 0x1007e -> No over/underflow (main path)
// 0x0ff81 - 23 <= exp_Result < 0x0ff81 -> Possible underflow
// exp_Result < 0x0ff81 - 23 -> Certain underflow
FR_Big = f6
FR_NBig = f7
FR_Floating_X = f8
FR_Result = f8
FR_Result2 = f9
FR_Result3 = f11
FR_Norm_X = f12
FR_Two_N = f14
FR_Two_to_Big = f15
FR_Result3 = f10
FR_Norm_X = f11
FR_Two_N = f12
GR_neg_ov_limit= r14
GR_N_Biased = r15
GR_Big = r16
GR_NBig = r17
GR_Scratch = r18
GR_Scratch1 = r19
GR_exp_Result = r18
GR_pos_ov_limit= r19
GR_Bias = r20
GR_N_as_int = r21
GR_signexp_X = r22
GR_exp_X = r23
GR_exp_mask = r24
GR_max_exp = r25
GR_min_exp = r26
GR_min_den_exp = r27
GR_SAVE_B0 = r32
GR_SAVE_GP = r33
@ -93,105 +106,142 @@ GLOBAL_LIBM_ENTRY(__libm_scalblnf)
// Build the exponent Bias
//
{ .mfi
alloc r32=ar.pfs,3,0,4,0
fclass.m.unc p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
addl GR_Bias = 0x0FFFF,r0
getf.exp GR_signexp_X = FR_Floating_X // Get signexp of x
fclass.m p6,p0 = FR_Floating_X, 0xe7 // @snan | @qnan | @inf | @zero
mov GR_Bias = 0x0ffff
}
//
// Is N zero?
// Normalize x
// Do we need to sign extend input (long_int_type = 0)?
// Is long integer type 32 bits?
//
{ .mfi
cmp.eq.unc p6,p0 = r33,r0
mov GR_Big = 35000 // If N this big then certain overflow
fnorm.s1 FR_Norm_X = FR_Floating_X
cmp.eq.unc p8,p9 = r34,r0
cmp.eq p8,p9 = r34,r0
}
;;
{ .mii
(p9) mov GR_N_as_int = r33 // Get n directly if long int 64 bits
(p8) sxt4 GR_N_as_int = r33 // Sign extend n if long int 32 bits
nop.i 0
}
;;
//
// Normalize x
// Branch and return special values.
// Create -35000
// Create 35000
//
// Sign extend N if long int is 32 bits
{ .mfi
addl GR_Big = 35000,r0
nop.f 0
add GR_N_Biased = GR_Bias,GR_N_as_int
(p9) mov GR_N_as_int = r33 // Copy N if long int is 64 bits
fclass.m p9,p0 = FR_Floating_X, 0x0b // Test for x=unorm
(p8) sxt4 GR_N_as_int = r33 // Sign extend N if long int is 32 bits
}
{ .mfb
addl GR_NBig = -35000,r0
(p7) fma.s.s0 FR_Result = FR_Floating_X,f1, f0
(p7) br.ret.spnt b0
};;
{ .mfi
mov GR_NBig = -35000 // If N this small then certain underflow
nop.f 0
mov GR_max_exp = 0x1007e // Exponent of maximum float
}
;;
//
// Build the exponent Bias
// Return x when N = 0
//
// Create biased exponent for 2**N
{ .mfi
add GR_N_Biased = GR_Bias,GR_N_as_int
nop.f 0
cmp.ge p7, p0 = GR_N_as_int, GR_Big // Certain overflow?
}
{ .mib
cmp.le p8, p0 = GR_N_as_int, GR_NBig // Certain underflow?
mov GR_min_exp = 0x0ff81 // Exponent of minimum float
(p9) br.cond.spnt SCALBNF_UNORM // Branch if x=unorm
}
;;
SCALBNF_COMMON:
// Main path continues. Also return here from x=unorm path.
// Create 2**N
.pred.rel "mutex",p7,p8
{ .mfi
setf.exp FR_Two_N = GR_N_Biased
nop.f 0
addl GR_Scratch1 = 0x063BF,r0
(p7) mov GR_N_as_int = GR_Big // Limit max N
}
{ .mfi
(p8) mov GR_N_as_int = GR_NBig // Limit min N
nop.f 0
(p8) cmp.eq p7,p0 = r0,r0 // Set p7 if |N| big
}
;;
//
// Create biased exponent for 2**N for N big
// Is N zero?
//
{ .mfi
(p7) add GR_N_Biased = GR_Bias,GR_N_as_int
nop.f 0
cmp.eq.or p6,p0 = r33,r0
}
{ .mfi
mov GR_pos_ov_limit = 0x1007f // Exponent for positive overflow
nop.f 0
mov GR_exp_mask = 0x1ffff // Exponent mask
}
;;
//
// Create 2**N for N big
// Return x when N = 0 or X = Nan, Inf, Zero
//
{ .mfi
(p7) setf.exp FR_Two_N = GR_N_Biased
nop.f 0
mov GR_min_den_exp = 0x0ff81 - 23 // Exponent of min denorm float
}
{ .mfb
addl GR_Scratch = 0x019C3F,r0
and GR_exp_X = GR_exp_mask, GR_signexp_X
(p6) fma.s.s0 FR_Result = FR_Floating_X, f1, f0
(p6) br.ret.spnt b0
};;
}
;;
//
// Create 2*big
// Create 2**-big
// Is N > 35000
// Is N < -35000
// Raise Denormal operand flag with compare
// Main path, create 2**N
// Compute biased result exponent
//
{ .mfi
setf.exp FR_NBig = GR_Scratch1
nop.f 0
cmp.ge.unc p6, p0 = GR_N_as_int, GR_Big
}
{ .mfi
setf.exp FR_Big = GR_Scratch
add GR_exp_Result = GR_exp_X, GR_N_as_int
fcmp.ge.s0 p0,p11 = FR_Floating_X,f0
cmp.le.unc p8, p0 = GR_N_as_int, GR_NBig
};;
mov GR_neg_ov_limit = 0x3007f // Exponent for negative overflow
}
;;
//
// Adjust 2**N if N was very small or very large
// Do final operation
//
{ .mfi
nop.m 0
(p6) fma.s1 FR_Two_N = FR_Big,f1,f0
nop.i 0
cmp.lt p7,p6 = GR_exp_Result, GR_max_exp // Test no overflow
fma.s.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
cmp.lt p9,p0 = GR_exp_Result, GR_min_den_exp // Test sure underflow
}
{ .mlx
nop.m 999
movl GR_Scratch = 0x000000000003007F
};;
{ .mfi
{ .mfb
nop.m 0
(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0
nop.i 0
nop.f 0
(p9) br.cond.spnt SCALBNF_UNDERFLOW // Branch if certain underflow
}
{ .mlx
nop.m 999
movl GR_Scratch1= 0x000000000001007F
};;
;;
{ .mib
(p6) cmp.gt.unc p6,p8 = GR_exp_Result, GR_max_exp // Test sure overflow
(p7) cmp.ge.unc p7,p9 = GR_exp_Result, GR_min_exp // Test no over/underflow
(p7) br.ret.sptk b0 // Return from main path
}
;;
{ .bbb
(p6) br.cond.spnt SCALBNF_OVERFLOW // Branch if certain overflow
(p8) br.cond.spnt SCALBNF_POSSIBLE_OVERFLOW // Branch if possible overflow
(p9) br.cond.spnt SCALBNF_POSSIBLE_UNDERFLOW // Branch if possible underflow
}
;;
// Here if possible underflow.
// Resulting exponent: 0x0ff81-23 <= exp_Result < 0x0ff81
SCALBNF_POSSIBLE_UNDERFLOW:
//
// Here if possible overflow.
// Resulting exponent: 0x1007e = exp_Result
SCALBNF_POSSIBLE_OVERFLOW:
// Set up necessary status fields
//
@ -200,34 +250,31 @@ GLOBAL_LIBM_ENTRY(__libm_scalblnf)
// S3 user supplied status + FZ + TD (Underflows)
//
{ .mfi
nop.m 999
nop.m 0
fsetc.s3 0x7F,0x41
nop.i 999
nop.i 0
}
{ .mfi
nop.m 999
nop.m 0
fsetc.s2 0x7F,0x42
nop.i 999
};;
nop.i 0
}
;;
//
// Do final operation
// Do final operation with s2 and s3
//
{ .mfi
setf.exp FR_NBig = GR_Scratch
fma.s.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
nop.i 999
setf.exp FR_NBig = GR_neg_ov_limit
fma.s.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
nop.i 0
}
{ .mfi
nop.m 999
fma.s.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
nop.i 999
};;
{ .mfi
setf.exp FR_Big = GR_Scratch1
setf.exp FR_Big = GR_pos_ov_limit
fma.s.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
nop.i 999
};;
nop.i 0
}
;;
// Check for overflow or underflow.
// Restore s3
@ -236,70 +283,91 @@ GLOBAL_LIBM_ENTRY(__libm_scalblnf)
{ .mfi
nop.m 0
fsetc.s3 0x7F,0x40
nop.i 999
nop.i 0
}
{ .mfi
nop.m 0
fsetc.s2 0x7F,0x40
nop.i 999
};;
nop.i 0
}
;;
//
// Is the result zero?
//
{ .mfi
nop.m 999
fclass.m.unc p6, p0 = FR_Result3, 0x007
nop.i 999
nop.m 0
fclass.m p6, p0 = FR_Result3, 0x007
nop.i 0
}
{ .mfi
addl GR_Tag = 205, r0
fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big
nop.m 0
fcmp.ge.s1 p7, p8 = FR_Result2 , FR_Big
nop.i 0
};;
}
;;
//
// Detect masked underflow - Tiny + Inexact Only
//
{ .mfi
nop.m 999
nop.m 0
(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
nop.i 999
};;
nop.i 0
}
;;
//
// Is result bigger the allowed range?
// Branch out for underflow
//
{ .mfb
(p6) addl GR_Tag = 206, r0
nop.m 0
(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
(p6) br.cond.spnt scalbnf_UNDERFLOW
};;
(p6) br.cond.spnt SCALBNF_UNDERFLOW
}
;;
//
// Branch out for overflow
//
{ .mbb
nop.m 0
(p7) br.cond.spnt scalbnf_OVERFLOW
(p9) br.cond.spnt scalbnf_OVERFLOW
};;
//
// Return from main path.
//
{ .mfb
nop.m 999
nop.f 0
br.ret.sptk b0;;
{ .bbb
(p7) br.cond.spnt SCALBNF_OVERFLOW
(p9) br.cond.spnt SCALBNF_OVERFLOW
br.ret.sptk b0 // Return from main path.
}
;;
// Here if result overflows
SCALBNF_OVERFLOW:
{ .mib
alloc r32=ar.pfs,3,0,4,0
addl GR_Tag = 205, r0 // Set error tag for overflow
br.cond.sptk __libm_error_region // Call error support for overflow
}
;;
// Here if result underflows
SCALBNF_UNDERFLOW:
{ .mib
alloc r32=ar.pfs,3,0,4,0
addl GR_Tag = 206, r0 // Set error tag for underflow
br.cond.sptk __libm_error_region // Call error support for underflow
}
;;
// Here if x=unorm
SCALBNF_UNORM:
{ .mib
getf.exp GR_signexp_X = FR_Norm_X // Get signexp of normalized x
nop.i 0
br.cond.sptk SCALBNF_COMMON // Return to main path
}
;;
GLOBAL_LIBM_END(__libm_scalblnf)
__libm_error_region:
scalbnf_OVERFLOW:
scalbnf_UNDERFLOW:
LOCAL_LIBM_ENTRY(__libm_error_region)
//
// Get stack address of N
@ -352,9 +420,9 @@ scalbnf_UNDERFLOW:
// Get location of result on stack
//
{ .mmi
nop.m 0
nop.m 0
add GR_Parameter_RESULT = 48,sp
nop.m 0
nop.i 0
};;
//

View File

@ -46,12 +46,13 @@
// 03/19/02 Added stack unwind around call to __libm_cis_large
// 09/05/02 Work range is widened by reduction strengthen (3 parts of Pi/16)
// 02/10/03 Reordered header: .section, .global, .proc, .align
// 08/08/03 Improved performance
// 02/11/04 cis is moved to the separate file.
//
// API
//==============================================================
// 1) double _Complex cis(double)
// 2) void sincos(double, double*s, double*c)
// 3) __libm_sincos - internal LIBM function, that accepts
// 1) void sincos(double, double*s, double*c)
// 2) __libm_sincos - internal LIBM function, that accepts
// argument in f8 and returns cosine through f8, sine through f9
//
// Overview of operation
@ -166,15 +167,14 @@
// Registers used
//==============================================================
// general input registers:
// r14 -> r19
// r32 -> r49
// r14 -> r39
// predicate registers used:
// p6 -> p14
//
// floating-point registers used
// f9 -> f15
// f32 -> f100
// f32 -> f67
// Assembly macros
//==============================================================
@ -246,38 +246,32 @@ cis_Q = f67
cis_pResSin = r33
cis_pResCos = r34
cis_exp_limit = r35
cis_r_signexp = r36
cis_AD_beta_table = r37
cis_r_sincos = r38
cis_r_exp = r39
cis_r_17_ones = r40
cis_GR_sig_inv_pi_by_16 = r14
cis_GR_rshf_2to61 = r15
cis_GR_rshf = r16
cis_GR_exp_2tom61 = r17
cis_GR_n = r18
cis_GR_n_sin = r19
cis_GR_m_sin = r41
cis_GR_32m_sin = r41
cis_exp_limit = r20
cis_r_signexp = r21
cis_AD_1 = r22
cis_r_sincos = r23
cis_r_exp = r24
cis_r_17_ones = r25
cis_GR_m_sin = r26
cis_GR_32m_sin = r26
cis_GR_n_cos = r27
cis_GR_m_cos = r28
cis_GR_32m_cos = r28
cis_AD_2_sin = r29
cis_AD_2_cos = r30
cis_gr_tmp = r31
cis_GR_n_cos = r42
cis_GR_m_cos = r43
cis_GR_32m_cos = r43
cis_AD_2_sin = r44
cis_AD_2_cos = r45
cis_gr_tmp = r46
GR_SAVE_B0 = r47
GR_SAVE_GP = r48
rB0_SAVED = r49
GR_SAVE_PFS = r50
GR_SAVE_PR = r51
cis_AD_1 = r52
GR_SAVE_B0 = r35
GR_SAVE_GP = r36
rB0_SAVED = r37
GR_SAVE_PFS = r38
GR_SAVE_PR = r39
RODATA
@ -408,7 +402,7 @@ LOCAL_OBJECT_END(double_sin_cos_beta_k4)
GLOBAL_IEEE754_ENTRY(sincos)
// cis_GR_sig_inv_pi_by_16 = significand of 16/pi
{ .mlx
alloc GR_SAVE_PFS = ar.pfs, 0, 21, 0, 0
getf.exp cis_r_signexp = cis_Arg
movl cis_GR_sig_inv_pi_by_16 = 0xA2F9836E4E44152A
}
@ -430,12 +424,11 @@ GLOBAL_IEEE754_ENTRY(sincos)
br.cond.sptk _CIS_COMMON
};;
GLOBAL_IEEE754_END(sincos)
LOCAL_LIBM_ENTRY(cis)
LOCAL_LIBM_END(cis)
GLOBAL_LIBM_ENTRY(__libm_sincos)
// cis_GR_sig_inv_pi_by_16 = significand of 16/pi
{ .mlx
alloc GR_SAVE_PFS = ar.pfs,0,21,0,0
getf.exp cis_r_signexp = cis_Arg
movl cis_GR_sig_inv_pi_by_16 = 0xA2F9836E4E44152A
}
// cis_GR_rshf_2to61 = 1.1000 2^(63+63-2)
@ -443,6 +436,7 @@ GLOBAL_LIBM_ENTRY(__libm_sincos)
addl cis_AD_1 = @ltoff(double_cis_pi), gp
movl cis_GR_rshf_2to61 = 0x47b8000000000000
};;
// p14 set for __libm_sincos and cis
{ .mfi
ld8 cis_AD_1 = [cis_AD_1]
@ -476,10 +470,15 @@ _CIS_COMMON:
// 2^-61 for scaling Nfloat
// 0x1001a is register_bias + 27.
// So if f8 >= 2^27, go to large arguments routine
{ .mmi
getf.exp cis_r_signexp = cis_Arg
setf.exp cis_2TOM61 = cis_GR_exp_2tom61
{ .mfi
alloc GR_SAVE_PFS = ar.pfs, 3, 5, 0, 0
fclass.m p11,p0 = cis_Arg, 0x0b // Test for x=unorm
mov cis_exp_limit = 0x1001a
}
{ .mib
setf.exp cis_2TOM61 = cis_GR_exp_2tom61
nop.i 0
(p6) br.cond.spnt _CIS_SPECIAL_ARGS
};;
// Load the two pieces of pi/16
@ -488,9 +487,11 @@ _CIS_COMMON:
{ .mmb
ldfe cis_Pi_by_16_hi = [cis_AD_1],16
setf.d cis_RSHF = cis_GR_rshf
(p6) br.cond.spnt _CIS_SPECIAL_ARGS
(p11) br.cond.spnt _CIS_UNORM // Branch if x=unorm
};;
_CIS_COMMON2:
// Return here if x=unorm
// Create constant inexact set
{ .mmi
ldfe cis_Pi_by_16_lo = [cis_AD_1],16
@ -498,23 +499,18 @@ _CIS_COMMON:
nop.i 0
};;
// Select exponent (17 lsb)
{ .mfi
ldfe cis_Pi_by_16_lowest = [cis_AD_1],16
nop.f 0
nop.i 0
dep.z cis_r_exp = cis_r_signexp, 0, 17
};;
// Start loading P, Q coefficients
{ .mib
ldfpd cis_P4,cis_Q4 = [cis_AD_1],16
dep.z cis_r_exp = cis_r_signexp, 0, 17
nop.b 0
};;
// p10 is true if we must call routines to handle larger arguments
// p10 is true if f8 exp is > 0x1001a
{ .mmb
ldfpd cis_P3,cis_Q3 = [cis_AD_1],16
ldfpd cis_P4,cis_Q4 = [cis_AD_1],16
cmp.ge p10, p0 = cis_r_exp, cis_exp_limit
(p10) br.cond.spnt _CIS_LARGE_ARGS // go to |x| >= 2^27 path
};;
@ -523,39 +519,33 @@ _CIS_COMMON:
// Multiply x by scaled 16/pi and add large const to shift integer part of W to
// rightmost bits of significand
{ .mfi
ldfpd cis_P2,cis_Q2 = [cis_AD_1],16
ldfpd cis_P3,cis_Q3 = [cis_AD_1],16
fma.s1 cis_W_2TO61_RSH = cis_NORM_f8,cis_SIG_INV_PI_BY_16_2TO61,cis_RSHF_2TO61
nop.i 0
};;
// get N = (int)cis_int_Nfloat
// cis_NFLOAT = Round_Int_Nearest(cis_W)
{ .mmf
getf.sig cis_GR_n = cis_W_2TO61_RSH
ldfpd cis_P2,cis_Q2 = [cis_AD_1],16
fms.s1 cis_NFLOAT = cis_W_2TO61_RSH,cis_2TOM61,cis_RSHF
};;
// cis_r = -cis_Nfloat * cis_Pi_by_16_hi + x
{ .mfi
ldfpd cis_P1,cis_Q1 = [cis_AD_1], 16
fms.s1 cis_NFLOAT = cis_W_2TO61_RSH,cis_2TOM61,cis_RSHF
nop.i 0
};;
// get N = (int)cis_int_Nfloat
{ .mfi
getf.sig cis_GR_n = cis_W_2TO61_RSH
nop.f 0
nop.i 0
};;
// Add 2^(k-1) (which is in cis_r_sincos) to N
// cis_r = -cis_Nfloat * cis_Pi_by_16_hi + x
// cis_r = cis_r -cis_Nfloat * cis_Pi_by_16_lo
{ .mfi
add cis_GR_n_cos = 0x8, cis_GR_n
fnma.s1 cis_r = cis_NFLOAT,cis_Pi_by_16_hi,cis_NORM_f8
nop.i 0
};;
//Get M (least k+1 bits of N)
// Add 2^(k-1) (which is in cis_r_sincos) to N
{ .mmi
add cis_GR_n_cos = 0x8, cis_GR_n
;;
//Get M (least k+1 bits of N)
and cis_GR_m_sin = 0x1f,cis_GR_n
and cis_GR_m_cos = 0x1f,cis_GR_n_cos
nop.i 0
};;
{ .mmi
@ -565,9 +555,10 @@ _CIS_COMMON:
};;
// Add 32*M to address of sin_cos_beta table
{ .mmi
// cis_r = cis_r -cis_Nfloat * cis_Pi_by_16_lo
{ .mfi
add cis_AD_2_sin = cis_GR_32m_sin, cis_AD_1
nop.m 0
fnma.s1 cis_r = cis_NFLOAT, cis_Pi_by_16_lo, cis_r
shl cis_GR_32m_cos = cis_GR_m_cos,5
};;
@ -580,7 +571,6 @@ _CIS_COMMON:
{ .mfi
ldfe cis_Sm_cos = [cis_AD_2_cos], 16
fnma.s1 cis_r = cis_NFLOAT, cis_Pi_by_16_lo, cis_r
nop.i 0
};;
@ -636,6 +626,12 @@ _CIS_COMMON:
nop.i 0
};;
{ .mfi
nop.m 0
fmpy.s1 cis_rcub = cis_r_exact, cis_rsq // get r^3
nop.i 0
};;
{ .mfi
nop.m 0
fma.s1 cis_Q = cis_rsq, cis_Q_temp2, cis_Q1
@ -647,12 +643,6 @@ _CIS_COMMON:
nop.i 0
};;
{ .mfi
nop.m 0
fmpy.s1 cis_rcub = cis_r_exact, cis_rsq // get r^3
nop.i 0
};;
{ .mfi
nop.m 0
fma.s1 cis_Q_sin = cis_srsq_sin,cis_Q, cis_Sm_sin
@ -717,7 +707,17 @@ _CIS_SPECIAL_ARGS:
stfd [cis_pResCos] = cis_Cos_res
br.ret.sptk b0 // common exit for sincos main path
};;
_CIS_UNORM:
// Here if x=unorm
{ .mfb
getf.exp cis_r_signexp = cis_NORM_f8 // Get signexp of x
fcmp.eq.s0 p11,p0 = cis_Arg, f0 // Dummy op to set denorm
br.cond.sptk _CIS_COMMON2 // Return to main path
};;
GLOBAL_LIBM_END(__libm_sincos)
//// |x| > 2^27 path ///////
.proc _CIS_LARGE_ARGS
_CIS_LARGE_ARGS:

View File

@ -792,6 +792,7 @@ GLOBAL_LIBM_END(__libm_sincos_large)
GLOBAL_LIBM_ENTRY(__libm_sin_large)
{ .mlx
@ -821,6 +822,7 @@ alloc GR_Table_Base = ar.pfs,0,12,2,0
}
GLOBAL_LIBM_END(__libm_sin_large)
GLOBAL_LIBM_ENTRY(__libm_cos_large)
{ .mlx
@ -2673,6 +2675,7 @@ SINCOS_SPECIAL:
}
GLOBAL_LIBM_END(__libm_cos_large)
// *******************************************************************
// *******************************************************************
// *******************************************************************

View File

@ -47,12 +47,12 @@
// 03/19/02 Added stack unwind around call to __libm_cisf_large
// 09/05/02 Work range is widened by reduction strengthen (2 parts of Pi/16)
// 02/10/03 Reordered header: .section, .global, .proc, .align
// 02/11/04 cisf is moved to the separate file.
// API
//==============================================================
// 1) float _Complex cisf(float)
// 2) void sincosf(float, float*s, float*c)
// 3) __libm_sincosf - internal LIBM function, that accepts
// 1) void sincosf(float, float*s, float*c)
// 2) __libm_sincosf - internal LIBM function, that accepts
// argument in f8 and returns cosine through f8, sine through f9
//
@ -420,8 +420,7 @@ GLOBAL_IEEE754_ENTRY(sincosf)
br.cond.sptk _CISF_COMMON
};;
GLOBAL_IEEE754_END(sincosf)
LOCAL_LIBM_ENTRY(cisf)
LOCAL_LIBM_END(cisf)
GLOBAL_LIBM_ENTRY(__libm_sincosf)
{ .mlx
// cisf_GR_sig_inv_pi_by_16 = significand of 16/pi
@ -679,6 +678,7 @@ _CISF_RETURN:
br.ret.sptk b0 // exit for sincos
};;
GLOBAL_LIBM_END(__libm_sincosf)
//// |x| > 2^24 path ///////
.proc _CISF_LARGE_ARGS
_CISF_LARGE_ARGS:

View File

@ -1,7 +1,7 @@
.file "libm_sincosl.asm"
.file "libm_sincosl.s"
// Copyright (c) 2000 - 2003, Intel Corporation
// Copyright (c) 2000 - 2004, Intel Corporation
// All rights reserved.
//
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
@ -43,6 +43,9 @@
// 05/13/02 Initial version of sincosl (based on libm's sinl and cosl)
// 02/10/03 Reordered header: .section, .global, .proc, .align;
// used data8 for long double table values
// 10/13/03 Corrected .file name
// 02/11/04 cisl is moved to the separate file.
// 10/26/04 Avoided using r14-31 as scratch so not clobbered by dynamic loader
//
//*********************************************************************
//
@ -50,9 +53,8 @@
//
// API's
//==============================================================
// 1) long double _Complex cisl(long double)
// 2) void sincosl(long double, long double*s, long double*c)
// 3) __libm_sincosl - internal LIBM function, that accepts
// 1) void sincosl(long double, long double*s, long double*c)
// 2) __libm_sincosl - internal LIBM function, that accepts
// argument in f8 and returns cosine through f8, sine through f9
//
//
@ -65,7 +67,7 @@
// f32-f121
//
// General Purpose Registers:
// r32-r47
// r32-r61
//
// Predicate Registers: p6-p15
//
@ -775,20 +777,6 @@ FR_Tmp = f94
sincos_pResSin = r34
sincos_pResCos = r35
GR_sig_inv_pi = r14
GR_rshf_2to64 = r15
GR_exp_2tom64 = r16
GR_rshf = r17
GR_ad_p = r18
GR_ad_d = r19
GR_ad_pp = r20
GR_ad_qq = r21
GR_ad_c = r22
GR_ad_s = r23
GR_ad_ce = r24
GR_ad_se = r25
GR_ad_m14 = r26
GR_ad_s1 = r27
GR_exp_m2_to_m3= r36
GR_N_Inc = r37
GR_Cis = r38
@ -803,6 +791,20 @@ GR_N_SignS = r45
GR_N_SignC = r46
GR_N_SinCos = r47
GR_sig_inv_pi = r48
GR_rshf_2to64 = r49
GR_exp_2tom64 = r50
GR_rshf = r51
GR_ad_p = r52
GR_ad_d = r53
GR_ad_pp = r54
GR_ad_qq = r55
GR_ad_c = r56
GR_ad_s = r57
GR_ad_ce = r58
GR_ad_se = r59
GR_ad_m14 = r60
GR_ad_s1 = r61
// For unwind support
GR_SAVE_B0 = r39
@ -814,7 +816,7 @@ GR_SAVE_PFS = r41
GLOBAL_IEEE754_ENTRY(sincosl)
{ .mlx ///////////////////////////// 1 /////////////////
alloc r32 = ar.pfs,3,13,2,0
alloc r32 = ar.pfs,3,27,2,0
movl GR_sig_inv_pi = 0xa2f9836e4e44152a // significand of 1/pi
}
{ .mlx
@ -834,11 +836,9 @@ GLOBAL_IEEE754_ENTRY(sincosl)
};;
GLOBAL_IEEE754_END(sincosl)
LOCAL_LIBM_ENTRY(cisl)
LOCAL_LIBM_END(cisl)
GLOBAL_LIBM_ENTRY(__libm_sincosl)
{ .mlx ///////////////////////////// 1 /////////////////
alloc r32 = ar.pfs,3,14,2,0
alloc r32 = ar.pfs,3,27,2,0
movl GR_sig_inv_pi = 0xa2f9836e4e44152a // significand of 1/pi
}
{ .mlx
@ -2447,6 +2447,7 @@ SINCOSL_SPECIAL:
GLOBAL_LIBM_END(__libm_sincosl)
// *******************************************************************
// *******************************************************************
// *******************************************************************
@ -2461,7 +2462,7 @@ GLOBAL_LIBM_END(__libm_sincosl)
// c is in f9
// N is in r8
// Be sure to allocate at least 2 GP registers as output registers for
// __libm_pi_by_2_reduce. This routine uses r49-50. These are used as
// __libm_pi_by_2_reduce. This routine uses r62-63. These are used as
// scratch registers within the __libm_pi_by_2_reduce routine (for speed).
//
// We know also that __libm_pi_by_2_reduce preserves f10-15, f71-127. We

File diff suppressed because it is too large Load Diff

View File

@ -1134,3 +1134,4 @@ ASINH_UNORM:
;;
GLOBAL_LIBM_END(asinh)

View File

@ -1344,3 +1344,4 @@ near_0:
GLOBAL_LIBM_END(asinhl)

View File

@ -553,3 +553,4 @@ ATANF_X_INF_NAN_ZERO:
;;
GLOBAL_LIBM_END(atanf)

View File

@ -812,6 +812,7 @@ GLOBAL_IEEE754_ENTRY(atanl)
;;
GLOBAL_IEEE754_END(atanl)
GLOBAL_IEEE754_ENTRY(atan2l)
{ .mfi
@ -1951,6 +1952,7 @@ ATANL_ArgY_Not_INF:
;;
GLOBAL_IEEE754_END(atan2l)
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi

View File

@ -762,3 +762,4 @@ GLOBAL_LIBM_END(cbrtf)

View File

@ -1,7 +1,7 @@
.file "cbrtl.s"
// Copyright (c) 2000 - 2003, Intel Corporation
// Copyright (c) 2000 - 2004, Intel Corporation
// All rights reserved.
//
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
@ -42,6 +42,7 @@
// 04/28/00 Initial version
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/06/03 Reordered header:.section,.global,.proc,.align
// 11/23/04 Reformatted routine and improved speed
//
// API
//==============================================================
@ -53,49 +54,93 @@
//
// Implementation
//
// cbrt(a) = cbrt(a y) / cbrt(y)
// = cbrt(1 - (1 - a y)) * 1/cbrt(y)
// The result is computed as
// cbrt(x)= cbrt(1 - (1 - x*y)) * (1/cbrt(y))
// where y = frcpa(x) = (-1)^sgn_y * 2^(3*k+j) * m_y,
// m_y in [1,2), j in {0,1,2}
//
// where y = frcpa(a).
// cbrt(1 - (1 - x*y)) is approximated by a degree-6 polynomial
// in r= 1 - x*y :
// P = 1 - c_1 r - c_2 * r^2 - c_3 * r^3 - c_4 * r^4 - c_5 * r^5 - c_6 * r^6
//
// * cbrt(1 - (1 - a y)) is approximated by a degree-6 polynomial
//
// 1 - c_1 r - c_2 * r^2 - c_3 * r^3 - c_4 * r^4 - c_5 * r^5 - c_6 * r^6
//
// in r = 1 - a y.
//
// * The values 1/cbrt(y) are stored as two tables of constants T_hi
// The values (1/cbrt(y)) are stored as two tables of constants T_hi
// (double-extended precision) and D (single precision) as follows:
//
// T_hi (1 + D)= 1/cbrt(y) to about 80 bits of accuracy
//
// The tables are only stored for three exponent values and are
// then multiplied by e/3 where e is the exponent of the input number.
// This computation is carried out in parallel with the polynomial
// evaluation:
// The tables are only stored for three exponent values (i.e.
// only for 2^j * m_y, where j in {0,1,2} and m_y covers the 256
// possible mantissas for an frcpa result); the index is formed
// by the 8 leading mantissa bits of x, which is the same index used
// by the hardware to get frcpa(x).
//
// T = 2^(e/3) * T_hi
// The table values are multiplied by 2^k where e is the exponent of
// the input number. This multiplication is carried out in parallel with
// the polynomial evaluation:
// T= 2^(k) * T_hi
//
//=======================================================================
//===============
// input = x
// C = frcpa(x)
// r = C * x - 1
//
// Special values
//==============================================================
// Registers used
//==============================================================
// f6-f15
// r2-r3, r23-r30
// p6, p7, p12
FR_R = f6
FR_C1 = f7
FR_C2 = f9
FR_C3 = f10
FR_C4 = f11
FR_C5 = f12
FR_C6 = f13
FR_XNORM = f14
FR_D = f15
FR_SPECIAL = f32
FR_RCP = f33
FR_R2 = f34
FR_P1 = f35
FR_P2 = f36
FR_P3 = f37
FR_P4 = f38
FR_P5 = f39
FR_R3 = f40
FR_T = f41
FR_TF = f42
FR_P = f43
FR_SGNEXP = f44
GR_ADDR = r2
GR_C_START = r2
GR_ARGSIG = r3
GR_NORMSIG = r15
GR_D_ADDR = r16
GR_D_START = r16
GR_INDEX2 = r17
GR_IX2 = r17
GR_NORMEXP = r18
GR_EXP5 = r19
GR_EXP3 = r20
GR_EXP6 = r20
GR_EXP17 = r21
GR_TMP1 = r21
GR_SGNMASK = r22
GR_T_INDEX = r23
GR_IX_T = r23
GR_IX_D = r24
GR_D_INDEX = r24
GR_TMP2 = r25
GR_TMP3 = r25
GR_TMP4 = r25
GR_EXP_RES = r26
GR_BIAS23 = r27
GR_EXPBIAS = r27
GR_EXP_MOD_3 = r28
GR_SIGN = r29
GR_EXPSIGNRES = r29
GR_REMTMP = r30
GR_NORMEXPSGN = r31
// Data tables
@ -116,7 +161,6 @@ LOCAL_OBJECT_END(poly_coeffs)
LOCAL_OBJECT_START(T_table)
data8 0x80155c748c374836, 0x8040404b0879f7f9
data8 0x806b5dce4b405c10, 0x8096b586974669b1
data8 0x80bcd273d952a028, 0x80e898c52813f2f3
@ -504,10 +548,6 @@ data8 0xffc01fed60f86fb5, 0xffeaae3832b63956
LOCAL_OBJECT_END(T_table)
LOCAL_OBJECT_START(D_table)
data4 0x1e50f488, 0x1ebdc559, 0x1e649ec1, 0x9eed9b2c
@ -709,184 +749,238 @@ LOCAL_OBJECT_END(D_table)
GLOBAL_LIBM_ENTRY(cbrtl)
{ .mfi
getf.sig r3=f8
// will continue only for normal/denormal numbers
getf.sig GR_ARGSIG = f8
// will continue on main path only for normal/denormal numbers
// all other values will be filtered out and will exit early
fclass.nm.unc p12, p7 = f8, 0x1b
// r2 = pointer to C_1...C_6 followed by T_table
addl r2 = @ltoff(poly_coeffs), gp;;
// GR_ADDR = pointer to C_1...C_6 followed by T_table
addl GR_ADDR = @ltoff(poly_coeffs), gp
}
{ .mfi
// r29=2/3*bias -63=0xaaaa-0x3f=0xaa6b
mov r29=0xaa6b
// GR_BIAS23 = 2/3*bias -63 = 0xaaaa-0x3f = 0xaa6b
mov GR_BIAS23 = 0xaa6b
// normalize a
fma.s1 f14=f8,f1,f0
// r27 = pointer to D table
addl r27 = @ltoff(D_table), gp;;
}
{.mib
nop.m 0
(p7) cmp.eq p12,p0=r3,r0
nop.b 0;;
}
{.mfb
// load start address for C_1...C_6 followed by T_table
ld8 r2=[r2]
(p12) fma.s0 f8=f8,f1,f0
(p12) br.ret.spnt b0;;
fma.s1 FR_XNORM = f8, f1, f0
// GR_D_ADDR = pointer to D table
addl GR_D_ADDR = @ltoff(D_table), gp
}
;;
{ .mmf
// load C_1
ldfe f7=[r2],16
// load start address for C_1...C_6 followed by T_table
ld8 GR_C_START = [ GR_ADDR ]
// load start address of D table
ld8 r27=[r27]
ld8 GR_D_START = [ GR_D_ADDR ]
// y = frcpa(a)
frcpa.s0 f8,p6=f1,f8;;
}
{.mmi
// load C_2
ldfe f9=[r2],16;;
// load C_3, C_4
ldfpd f10,f11=[r2],16
nop.i 0;;
frcpa.s1 FR_RCP, p6 = f1, f8
}
;;
{ .mmi
// get normalized significand
getf.sig r23=f14
getf.sig GR_NORMSIG = FR_XNORM
// get exponent
getf.exp r24=f14
mov r25=0x20000;;
getf.exp GR_NORMEXPSGN = FR_XNORM
(p7) cmp.eq p12, p0 = GR_ARGSIG, r0
}
;;
{ .mii
// get r26=sign
and r26=r24,r25
// eliminate leading 1 from r23=2nd table index
shl r23=r23,1
// eliminate sign from exponent (r25)
andcm r25=r24,r25;;
// load C_1
ldfe FR_C1 = [ GR_C_START ], 16
mov GR_SGNMASK = 0x20000
nop.i 0
}
;;
{ .mfb
// load C_2
ldfe FR_C2 = [ GR_C_START ], 16
(p12) fma.s0 f8 = f8, f1, f0
// NaN/Infinities exit early
(p12) br.ret.spnt b0
}
;;
{ .mfi
// load C_3, C_4
ldfpd FR_C3, FR_C4 = [ GR_C_START ], 16
// y = frcpa(a), set flags and result when argument is 0
// only used when p6=0
frcpa.s0 f8, p0 = f1, f8
nop.i 0
}
;;
{ .mii
// get GR_SIGN = sign
and GR_SIGN = GR_NORMEXPSGN, GR_SGNMASK
// eliminate leading 1 from GR_NORMSIG = 2nd table index
shl GR_INDEX2 = GR_NORMSIG, 1
// eliminate sign from exponent
andcm GR_NORMEXP = GR_NORMEXPSGN, GR_SGNMASK
}
;;
{ .mfi
// load C_5, C_6
(p6) ldfpd f12,f13=[r2],16
(p6) ldfpd FR_C5, FR_C6 = [ GR_C_START ], 16
// r = 1-a*y
(p6) fnma.s1 f6=f8,f14,f1
// 1: exponent*=5; // (2^{16}-1)/3=0x5555
shladd r24=r25,2,r25;;
(p6) fnma.s1 FR_R = FR_RCP, FR_XNORM, f1
// Start computation of floor(exponent/3) by
// computing (2^20+2)/3*exponent = exponent*0x55556
// 1: exponent* = 5;
// (2^{16}-1)/3 = 0x5555:
// will form 0x5555*exponent by using shladd's
shladd GR_EXP5 = GR_NORMEXP, 2, GR_NORMEXP
}
;;
{ .mib
// r30=(5*expon)*16
shladd r30=r24,4,r0
// r28=3*exponent
shladd r28=r25,1,r25
nop.b 0;;
// Next several integer steps compute floor(exponent/3)
// GR_TMP1 = (5*expon)*16
shladd GR_TMP1 = GR_EXP5, 4, r0
// GR_EXP3 = 3*exponent
shladd GR_EXP3 = GR_NORMEXP, 1, GR_NORMEXP
nop.b 0
}
;;
{ .mmi
// r28=6*exponent
shladd r28=r28,1,r0
// r24=17*expon
add r24=r24,r30
// r23=2nd table index (8 bits)
shr.u r23=r23,56;;
// GR_EXP6 = 6*exponent
shladd GR_EXP6 = GR_EXP3, 1, r0
// GR_EXP17 = 17*expon
add GR_EXP17 = GR_EXP5, GR_TMP1
// GR_IX2 = 2nd table index (8 bits)
shr.u GR_IX2 = GR_INDEX2, 56
}
;;
{ .mmi
// adjust T_table pointer by 2nd index
shladd r2=r23,3,r2
shladd GR_T_INDEX = GR_IX2, 3, GR_C_START
// adjust D_table pointer by 2nd index
shladd r27=r23,2,r27
// r30=(17*expon)*16^2
shl r30=r24,8;;
shladd GR_D_INDEX = GR_IX2, 2, GR_D_START
// GR_TMP2 = (17*expon)*16^2
shl GR_TMP2 = GR_EXP17, 8
}
;;
{ .mmi
// r24=expon*(2^16-1)/3
add r24=r24,r30;;
// r24=expon*(2^20+2)/3=expon*0x55556
shladd r24=r24,4,r28
nop.i 0;;
// GR_TMP3 = expon*(2^16-1)/3
add GR_TMP3 = GR_EXP17, GR_TMP2
;;
// GR_TMP4 = expon*(2^20+2)/3 = expon*0x55556
shladd GR_TMP4 = GR_TMP3, 4, GR_EXP6
nop.i 0
}
;;
{ .mii
nop.m 0
// r24=floor(expon/3)
shr.u r24=r24,20
nop.i 0;;
// GR_EXP_RES = floor(expon/3)
shr.u GR_EXP_RES = GR_TMP4, 20
nop.i 0
}
;;
{ .mmi
nop.m 0
// r28=3*exponent
shladd r28=r24,1,r24
// r16 = 3*exponent
shladd r16 = GR_EXP_RES, 1, GR_EXP_RES
// bias exponent
add r24=r29,r24;;
add GR_EXPBIAS = GR_BIAS23, GR_EXP_RES
}
;;
{ .mmi
// get remainder of exponent/3
sub r25=r25,r28;;
sub GR_EXP_MOD_3 = GR_NORMEXP, r16
;;
// add sign to exponent
or r24=r24,r26
or GR_EXPSIGNRES = GR_EXPBIAS, GR_SIGN
// remainder << = 8
shl r25=r25,8;;
shl GR_REMTMP = GR_EXP_MOD_3, 8
}
;;
{ .mfi
// adjust D_table pointer by 1st index
shladd r27=r25,2,r27
shladd GR_IX_D = GR_REMTMP, 2, GR_D_INDEX
// P_1 = C_1+C_2*r
(p6) fma.s1 f7=f9,f6,f7
(p6) fma.s1 FR_P1 = FR_C2, FR_R, FR_C1
// adjust T_table pointer by 1st index
shladd r2=r25,3,r2
shladd GR_IX_T = GR_REMTMP, 3, GR_T_INDEX
}
{ .mfi
// f14=sign*2^{exponent/3}
(p6) setf.exp f14=r24
// r2=r*r
(p6) fma.s1 f9=f6,f6,f0
nop.i 0;;
// FR_SGNEXP = sign*2^{exponent/3}
(p6) setf.exp FR_SGNEXP = GR_EXPSIGNRES
// r^2 = r*r
(p6) fma.s1 FR_R2 = FR_R, FR_R, f0
nop.i 0
}
;;
{ .mfi
// load D
(p6) ldfs f15=[r27]
(p6) ldfs FR_D = [ GR_IX_D ]
// P_2 = C_3+C_4*r
(p6) fma.s1 f10=f11,f6,f10
(p6) fma.s1 FR_P2 = FR_C4, FR_R, FR_C3
nop.i 0
}
{ .mfi
// load T
(p6) ldf8 f8=[r2]
(p6) ldf8 FR_T = [ GR_IX_T ]
// P_3 = C_5+C_6*r
(p6) fma.s1 f12=f13,f6,f12
nop.i 0;;
(p6) fma.s1 FR_P3 = FR_C6, FR_R, FR_C5
nop.i 0
}
;;
{ .mfi
nop.m 0
// P_4 = D-r*P_1
(p6) fnma.s1 f15=f6,f7,f15
(p6) fnma.s1 FR_P4 = FR_R, FR_P1, FR_D
nop.i 0
}
{ .mfi
nop.m 0
// r3=r*r2
(p6) fma.s1 f6=f6,f9,f0
nop.i 0;;
// r^3 = r*r^2
(p6) fma.s1 FR_R3 = FR_R, FR_R2, f0
nop.i 0
}
;;
{ .mfi
nop.m 0
// P_5 = P_2+r2*P_3
(p6) fma.s1 f10=f9,f12,f10
nop.i 0;;
(p6) fma.s1 FR_P5 = FR_R2, FR_P3, FR_P2
nop.i 0
}
;;
{ .mfi
nop.m 0
// T = T*(sign*2^{exponent/3})
(p6) fma.s1 f8=f8,f14,f0
(p6) fma.s1 FR_TF = FR_T, FR_SGNEXP, f0
nop.i 0
}
{ .mfi
nop.m 0
// P = P_4-r3*P_5
(p6) fnma.s1 f6=f6,f10,f15
nop.i 0;;
(p6) fnma.s1 FR_P = FR_R3, FR_P5, FR_P4
nop.i 0
}
;;
{ .mfb
nop.m 0
// result = T+T*p
(p6) fma.s0 f8=f8,f6,f8
br.ret.sptk b0;;
(p6) fma.s0 f8 = FR_TF, FR_P, FR_TF
br.ret.sptk b0
}
;;
GLOBAL_LIBM_END(cbrtl)

View File

@ -1,7 +1,7 @@
.file "sincos.s"
// Copyright (c) 2000 - 2003, Intel Corporation
// Copyright (c) 2000 - 2004, Intel Corporation
// All rights reserved.
//
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
@ -51,6 +51,8 @@
// 06/03/02 Insure inexact flag set for large arg result
// 09/05/02 Work range is widened by reduction strengthen (3 parts of Pi/16)
// 02/10/03 Reordered header: .section, .global, .proc, .align
// 08/08/03 Improved performance
// 10/28/04 Saved sincos_r_sincos to avoid clobber by dynamic loader
// API
//==============================================================
@ -170,11 +172,11 @@
// Registers used
//==============================================================
// general input registers:
// r14 -> r19
// r32 -> r45
// r14 -> r26
// r32 -> r35
// predicate registers used:
// p6 -> p14
// p6 -> p11
// floating-point registers used
// f9 -> f15
@ -236,16 +238,6 @@ fp_tmp = f61
/////////////////////////////////////////////////////////////
sincos_AD_1 = r33
sincos_AD_2 = r34
sincos_exp_limit = r35
sincos_r_signexp = r36
sincos_AD_beta_table = r37
sincos_r_sincos = r38
sincos_r_exp = r39
sincos_r_17_ones = r40
sincos_GR_sig_inv_pi_by_16 = r14
sincos_GR_rshf_2to61 = r15
sincos_GR_rshf = r16
@ -254,11 +246,18 @@ sincos_GR_n = r18
sincos_GR_m = r19
sincos_GR_32m = r19
sincos_GR_all_ones = r19
sincos_AD_1 = r20
sincos_AD_2 = r21
sincos_exp_limit = r22
sincos_r_signexp = r23
sincos_r_17_ones = r24
sincos_r_sincos = r25
sincos_r_exp = r26
gr_tmp = r41
GR_SAVE_PFS = r41
GR_SAVE_B0 = r42
GR_SAVE_GP = r43
GR_SAVE_PFS = r33
GR_SAVE_B0 = r34
GR_SAVE_GP = r35
GR_SAVE_r_sincos = r36
RODATA
@ -405,7 +404,7 @@ LOCAL_OBJECT_END(double_sin_cos_beta_k4)
GLOBAL_IEEE754_ENTRY(sin)
{ .mlx
alloc r32 = ar.pfs, 1, 13, 0, 0
getf.exp sincos_r_signexp = f8
movl sincos_GR_sig_inv_pi_by_16 = 0xA2F9836E4E44152A // signd of 16/pi
}
{ .mlx
@ -427,10 +426,11 @@ GLOBAL_IEEE754_ENTRY(sin)
;;
GLOBAL_IEEE754_END(sin)
GLOBAL_IEEE754_ENTRY(cos)
{ .mlx
alloc r32 = ar.pfs, 1, 13, 0, 0
getf.exp sincos_r_signexp = f8
movl sincos_GR_sig_inv_pi_by_16 = 0xA2F9836E4E44152A // signd of 16/pi
}
{ .mlx
@ -464,7 +464,6 @@ _SINCOS_COMMON:
// Form two constants we need
// 16/pi * 2^-2 * 2^63, scaled by 2^61 since we just loaded the significand
// 1.1000...000 * 2^(63+63-2) to right shift int(W) into the low significand
// fcmp used to set denormal, and invalid on snans
{ .mfi
setf.sig sincos_SIG_INV_PI_BY_16_2TO61 = sincos_GR_sig_inv_pi_by_16
fclass.m p6,p0 = f8, 0xe7 // if x = 0,inf,nan
@ -480,10 +479,15 @@ _SINCOS_COMMON:
// 2^-61 for scaling Nfloat
// 0x1001a is register_bias + 27.
// So if f8 >= 2^27, go to large argument routines
{ .mmi
getf.exp sincos_r_signexp = f8
{ .mfi
alloc r32 = ar.pfs, 1, 4, 0, 0
fclass.m p11,p0 = f8, 0x0b // Test for x=unorm
mov sincos_GR_all_ones = -1 // For "inexect" constant create
}
{ .mib
setf.exp sincos_2TOM61 = sincos_GR_exp_2tom61
addl gr_tmp = -1,r0 // For "inexect" constant create
nop.i 999
(p6) br.cond.spnt _SINCOS_SPECIAL_ARGS
}
;;
@ -493,41 +497,31 @@ _SINCOS_COMMON:
{ .mmb
ldfe sincos_Pi_by_16_1 = [sincos_AD_1],16
setf.d sincos_RSHF = sincos_GR_rshf
(p6) br.cond.spnt _SINCOS_SPECIAL_ARGS
(p11) br.cond.spnt _SINCOS_UNORM // Branch if x=unorm
}
;;
_SINCOS_COMMON2:
// Return here if x=unorm
// Create constant used to set inexact
{ .mmi
ldfe sincos_Pi_by_16_2 = [sincos_AD_1],16
setf.sig fp_tmp = gr_tmp // constant for inexact set
nop.i 999
};;
{ .mfi
ldfe sincos_Pi_by_16_3 = [sincos_AD_1],16
nop.f 999
nop.i 999
};;
// Polynomial coefficients (Q4, P4, Q3, P3, Q2, Q1, P2, P1) loading
{ .mmi
ldfpd sincos_P4,sincos_Q4 = [sincos_AD_1],16
nop.m 999
setf.sig fp_tmp = sincos_GR_all_ones
nop.i 999
};;
// Select exponent (17 lsb)
{ .mmi
ldfpd sincos_P3,sincos_Q3 = [sincos_AD_1],16
nop.m 999
{ .mfi
ldfe sincos_Pi_by_16_3 = [sincos_AD_1],16
nop.f 999
dep.z sincos_r_exp = sincos_r_signexp, 0, 17
}
;;
};;
// Polynomial coefficients (Q4, P4, Q3, P3, Q2, Q1, P2, P1) loading
// p10 is true if we must call routines to handle larger arguments
// p10 is true if f8 exp is >= 0x1001a (2^27)
{ .mmb
ldfpd sincos_P2,sincos_Q2 = [sincos_AD_1],16
ldfpd sincos_P4,sincos_Q4 = [sincos_AD_1],16
cmp.ge p10,p0 = sincos_r_exp,sincos_exp_limit
(p10) br.cond.spnt _SINCOS_LARGE_ARGS // Go to "large args" routine
};;
@ -536,66 +530,61 @@ _SINCOS_COMMON:
// Multiply x by scaled 16/pi and add large const to shift integer part of W to
// rightmost bits of significand
{ .mfi
ldfpd sincos_P1,sincos_Q1 = [sincos_AD_1],16
ldfpd sincos_P3,sincos_Q3 = [sincos_AD_1],16
fma.s1 sincos_W_2TO61_RSH = sincos_NORM_f8,sincos_SIG_INV_PI_BY_16_2TO61,sincos_RSHF_2TO61
nop.i 999
};;
// get N = (int)sincos_int_Nfloat
// sincos_NFLOAT = Round_Int_Nearest(sincos_W)
// This is done by scaling back by 2^-61 and subtracting the shift constant
{ .mfi
nop.m 999
fms.s1 sincos_NFLOAT = sincos_W_2TO61_RSH,sincos_2TOM61,sincos_RSHF
nop.i 999
};;
// get N = (int)sincos_int_Nfloat
{ .mfi
{ .mmf
getf.sig sincos_GR_n = sincos_W_2TO61_RSH
nop.f 999
nop.i 999
ldfpd sincos_P2,sincos_Q2 = [sincos_AD_1],16
fms.s1 sincos_NFLOAT = sincos_W_2TO61_RSH,sincos_2TOM61,sincos_RSHF
};;
// Add 2^(k-1) (which is in sincos_r_sincos) to N
// sincos_r = -sincos_Nfloat * sincos_Pi_by_16_1 + x
{ .mfi
add sincos_GR_n = sincos_GR_n, sincos_r_sincos
ldfpd sincos_P1,sincos_Q1 = [sincos_AD_1],16
fnma.s1 sincos_r = sincos_NFLOAT, sincos_Pi_by_16_1, sincos_NORM_f8
nop.i 999
};;
// Get M (least k+1 bits of N)
// Add 2^(k-1) (which is in sincos_r_sincos) to N
{ .mmi
and sincos_GR_m = 0x1f,sincos_GR_n;;
add sincos_GR_n = sincos_GR_n, sincos_r_sincos
;;
// Get M (least k+1 bits of N)
and sincos_GR_m = 0x1f,sincos_GR_n
nop.i 999
};;
// sincos_r = sincos_r -sincos_Nfloat * sincos_Pi_by_16_2
{ .mfi
nop.m 999
fnma.s1 sincos_r = sincos_NFLOAT, sincos_Pi_by_16_2, sincos_r
shl sincos_GR_32m = sincos_GR_m,5
};;
// Add 32*M to address of sin_cos_beta table
// For sin denorm. - set uflow
{ .mfi
add sincos_AD_2 = sincos_GR_32m, sincos_AD_1
(p8) fclass.m.unc p10,p0 = f8,0x0b // For sin denorm. - set uflow
(p8) fclass.m.unc p10,p0 = f8,0x0b
nop.i 999
};;
// Load Sin and Cos table value using obtained index m (sincosf_AD_2)
{ .mfi
ldfe sincos_Sm = [sincos_AD_2],16
(p9) fclass.m.unc p11,p0 = f8,0x0b // For cos denorm - set denorm
nop.i 999
};;
// sincos_r = sincos_r -sincos_Nfloat * sincos_Pi_by_16_2
{ .mfi
ldfe sincos_Cm = [sincos_AD_2]
fnma.s1 sincos_r = sincos_NFLOAT, sincos_Pi_by_16_2, sincos_r
nop.f 999
nop.i 999
};;
// get rsq = r*r
{ .mfi
nop.m 999
ldfe sincos_Cm = [sincos_AD_2]
fma.s1 sincos_rsq = sincos_r, sincos_r, f0 // r^2 = r*r
nop.i 999
}
@ -660,7 +649,6 @@ _SINCOS_COMMON:
fma.s1 sincos_Q = sincos_rsq, sincos_Q_temp2, sincos_Q1
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 sincos_P = sincos_rsq, sincos_P_temp2, sincos_P1
@ -675,7 +663,6 @@ _SINCOS_COMMON:
fma.s1 sincos_Q = sincos_srsq,sincos_Q, sincos_Sm
nop.i 999
}
{ .mfi
nop.m 999
fma.s1 sincos_P = sincos_rcub,sincos_P, sincos_r_exact
@ -683,19 +670,12 @@ _SINCOS_COMMON:
};;
// If sin(denormal), force underflow to be set
.pred.rel "mutex",p10,p11
{ .mfi
nop.m 999
(p10) fmpy.d.s0 fp_tmp = f8,f8 // forces underflow flag
nop.i 999 // for denormal sine args
}
{ .mfi
nop.m 999
(p11) fma.d.s0 fp_tmp = f8,f1, f8 // forces denormal flag
nop.i 999 // for denormal cosine args
(p10) fmpy.d.s0 fp_tmp = sincos_NORM_f8,sincos_NORM_f8
nop.i 999
};;
// Final calculation
// result = C[m]*P + Q
{ .mfb
@ -724,13 +704,22 @@ _SINCOS_SPECIAL_ARGS:
br.ret.sptk b0 // Exit for x = 0/Inf/NaN path
};;
_SINCOS_UNORM:
// Here if x=unorm
{ .mfb
getf.exp sincos_r_signexp = sincos_NORM_f8 // Get signexp of x
fcmp.eq.s0 p11,p0 = f8, f0 // Dummy op to set denorm flag
br.cond.sptk _SINCOS_COMMON2 // Return to main path
};;
GLOBAL_IEEE754_END(cos)
//////////// x >= 2^27 - large arguments routine call ////////////
LOCAL_LIBM_ENTRY(__libm_callout_sincos)
_SINCOS_LARGE_ARGS:
.prologue
{ .mfi
mov sincos_GR_all_ones = -1 // 0xffffffff
mov GR_SAVE_r_sincos = sincos_r_sincos // Save sin or cos
nop.f 999
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS = ar.pfs
@ -753,7 +742,7 @@ _SINCOS_LARGE_ARGS:
};;
{ .mbb
cmp.ne p9,p0 = sincos_r_sincos, r0 // set p9 if cos
cmp.ne p9,p0 = GR_SAVE_r_sincos, r0 // set p9 if cos
nop.b 999
(p9) br.call.sptk.many b0 = __libm_cos_large# // cos(large_X)
};;

View File

@ -408,6 +408,7 @@ GLOBAL_IEEE754_ENTRY(sinf)
};;
GLOBAL_IEEE754_END(sinf)
GLOBAL_IEEE754_ENTRY(cosf)
{ .mlx
@ -657,6 +658,7 @@ _SINCOSF_SPECIAL_ARGS:
};;
GLOBAL_IEEE754_END(cosf)
//////////// x >= 2^24 - large arguments routine call ////////////
LOCAL_LIBM_ENTRY(__libm_callout_sincosf)
_SINCOSF_LARGE_ARGS:

View File

@ -1,7 +1,7 @@
.file "sincosl.s"
// Copyright (c) 2000 - 2003, Intel Corporation
// Copyright (c) 2000 - 2004, Intel Corporation
// All rights reserved.
//
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
@ -47,6 +47,8 @@
// 05/13/02 Changed interface to __libm_pi_by_2_reduce
// 02/10/03 Reordered header: .section, .global, .proc, .align;
// used data8 for long double table values
// 10/13/03 Corrected final .endp name to match .proc
// 10/26/04 Avoided using r14-31 as scratch so not clobbered by dynamic loader
//
//*********************************************************************
//
@ -63,8 +65,7 @@
// f32-f99
//
// General Purpose Registers:
// r32-r43
// r44-r45 (Used to pass arguments to pi_by_2 reduce routine)
// r32-r58
//
// Predicate Registers: p6-p13
//
@ -715,20 +716,6 @@ FR_PP_1_lo = f98
FR_ArgPrime = f99
FR_inexact = f100
GR_sig_inv_pi = r14
GR_rshf_2to64 = r15
GR_exp_2tom64 = r16
GR_rshf = r17
GR_ad_p = r18
GR_ad_d = r19
GR_ad_pp = r20
GR_ad_qq = r21
GR_ad_c = r22
GR_ad_s = r23
GR_ad_ce = r24
GR_ad_se = r25
GR_ad_m14 = r26
GR_ad_s1 = r27
GR_exp_m2_to_m3= r36
GR_N_Inc = r37
GR_Sin_or_Cos = r38
@ -739,6 +726,21 @@ GR_exp_2_to_63 = r42
GR_exp_2_to_m3 = r43
GR_exp_2_to_24 = r44
GR_sig_inv_pi = r45
GR_rshf_2to64 = r46
GR_exp_2tom64 = r47
GR_rshf = r48
GR_ad_p = r49
GR_ad_d = r50
GR_ad_pp = r51
GR_ad_qq = r52
GR_ad_c = r53
GR_ad_s = r54
GR_ad_ce = r55
GR_ad_se = r56
GR_ad_m14 = r57
GR_ad_s1 = r58
// Added for unwind support
GR_SAVE_B0 = r39
@ -750,7 +752,7 @@ GR_SAVE_PFS = r41
GLOBAL_IEEE754_ENTRY(sinl)
{ .mlx
alloc r32 = ar.pfs,0,12,2,0
alloc r32 = ar.pfs,0,27,2,0
movl GR_sig_inv_pi = 0xa2f9836e4e44152a // significand of 1/pi
}
{ .mlx
@ -772,9 +774,10 @@ GLOBAL_IEEE754_ENTRY(sinl)
;;
GLOBAL_IEEE754_END(sinl)
GLOBAL_IEEE754_ENTRY(cosl)
{ .mlx
alloc r32 = ar.pfs,0,12,2,0
alloc r32 = ar.pfs,0,27,2,0
movl GR_sig_inv_pi = 0xa2f9836e4e44152a // significand of 1/pi
}
{ .mlx
@ -2285,6 +2288,7 @@ SINCOSL_SPECIAL:
}
GLOBAL_IEEE754_END(cosl)
// *******************************************************************
// *******************************************************************
// *******************************************************************
@ -2299,7 +2303,7 @@ GLOBAL_IEEE754_END(cosl)
// c is in f9
// N is in r8
// Be sure to allocate at least 2 GP registers as output registers for
// __libm_pi_by_2_reduce. This routine uses r49-50. These are used as
// __libm_pi_by_2_reduce. This routine uses r59-60. These are used as
// scratch registers within the __libm_pi_by_2_reduce routine (for speed).
//
// We know also that __libm_pi_by_2_reduce preserves f10-15, f71-127. We
@ -2356,6 +2360,6 @@ SINCOSL_ARG_TOO_LARGE:
br.cond.sptk SINCOSL_NORMAL_R // Branch if |r|>=2^-3 for |x| >= 2^63
};;
.endp
LOCAL_LIBM_END(__libm_callout)
.type __libm_pi_by_2_reduce#,@function
.global __libm_pi_by_2_reduce#

View File

@ -922,3 +922,4 @@ erf_denormal:
GLOBAL_LIBM_END(erf)

View File

@ -1135,6 +1135,7 @@ GLOBAL_LIBM_ENTRY(erfc)
};;
GLOBAL_LIBM_END(erfc)
// call via (p15) br.cond.spnt __libm_error_region
// for x > ARG_ASYMP = 28.0
// or

Some files were not shown because too many files have changed in this diff Show More