* sysdeps/unix/sysv/linux/x86_64/getcontext.S: Use functionally

equivalent, but shorter instructions. * sysdeps/unix/sysv/linux/x86_64/sysdep.h: Likewise. * sysdeps/unix/sysv/linux/x86_64/setcontext.S: Likewise. * sysdeps/unix/sysv/linux/x86_64/clone.S: Likewise. * sysdeps/unix/sysv/linux/x86_64/swapcontext.S: Likewise. * sysdeps/unix/x86_64/sysdep.S: Likewise. * sysdeps/x86_64/strchr.S: Likewise. * sysdeps/x86_64/memset.S: Likewise. * sysdeps/x86_64/strcspn.S: Likewise. * sysdeps/x86_64/strcmp.S: Likewise. * sysdeps/x86_64/elf/start.S: Likewise. * sysdeps/x86_64/strspn.S: Likewise. * sysdeps/x86_64/dl-machine.h: Likewise. * sysdeps/x86_64/bsd-_setjmp.S: Likewise. * sysdeps/x86_64/bsd-setjmp.S: Likewise. * sysdeps/x86_64/strtok.S: Likewise.
2025-07-29 11:41:21 +03:00 · 2005-03-31 10:02:53 +00:00
parent 4d6302cf51
commit ee6189855a
174 changed files with 8152 additions and 6793 deletions
--- a/20
+++ b/20
@ -1,3 +1,23 @@
+2005-03-31  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/unix/sysv/linux/x86_64/getcontext.S: Use functionally
+	equivalent, but shorter instructions.
+	* sysdeps/unix/sysv/linux/x86_64/sysdep.h: Likewise.
+	* sysdeps/unix/sysv/linux/x86_64/setcontext.S: Likewise.
+	* sysdeps/unix/sysv/linux/x86_64/clone.S: Likewise.
+	* sysdeps/unix/sysv/linux/x86_64/swapcontext.S: Likewise.
+	* sysdeps/unix/x86_64/sysdep.S: Likewise.
+	* sysdeps/x86_64/strchr.S: Likewise.
+	* sysdeps/x86_64/memset.S: Likewise.
+	* sysdeps/x86_64/strcspn.S: Likewise.
+	* sysdeps/x86_64/strcmp.S: Likewise.
+	* sysdeps/x86_64/elf/start.S: Likewise.
+	* sysdeps/x86_64/strspn.S: Likewise.
+	* sysdeps/x86_64/dl-machine.h: Likewise.
+	* sysdeps/x86_64/bsd-_setjmp.S: Likewise.
+	* sysdeps/x86_64/bsd-setjmp.S: Likewise.
+	* sysdeps/x86_64/strtok.S: Likewise.
+
 2005-03-30  H.J. Lu  <hongjiu.lu@intel.com>

 	* sysdeps/ia64/fpu/e_acosf.S: Update from Intel libm 2005-03-21.
--- a/linuxthreads/ChangeLog
+++ b/linuxthreads/ChangeLog
@ -1,3 +1,8 @@
+2005-03-31  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/unix/sysv/linux/x86_64/sysdep-cancel.h: Use
+	functionally equivalent, but shorter instructions.
+
 2005-03-28  Daniel Jacobowitz  <dan@codesourcery.com>

 	* sysdeps/mips/tls.h: New file.
--- a/linuxthreads/sysdeps/unix/sysv/linux/x86_64/sysdep-cancel.h
+++ b/linuxthreads/sysdeps/unix/sysv/linux/x86_64/sysdep-cancel.h
@ -1,4 +1,4 @@
-/* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
+/* Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Jakub Jelinek <jakub@redhat.com>, 2002.

@ -45,7 +45,7 @@
    POPARGS_##args							      \
    /* The return value from CENABLE is argument for CDISABLE.  */	      \
    movq %rax, (%rsp);							      \
-    movq $SYS_ify (syscall_name), %rax;					      \
+    movl $SYS_ify (syscall_name), %eax;					      \
    syscall;								      \
    movq (%rsp), %rdi;							      \
    /* Save %rax since it's the error code from the syscall.  */	      \
--- a/nptl/ChangeLog
+++ b/nptl/ChangeLog
@ -1,3 +1,25 @@
+2005-03-31  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_unlock.S: Use
+	functionally equivalent, but shorter instructions.
+	* sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S: Likewise.
+	* sysdeps/unix/sysv/linux/x86_64/sysdep-cancel.h: Likewise.
+	* sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_timedrdlock.S:
+	Likewise.
+	* sysdeps/unix/sysv/linux/x86_64/pthread_barrier_wait.S: Likewise.
+	* sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_rdlock.S: Likewise.
+	* sysdeps/unix/sysv/linux/x86_64/sem_wait.S: Likewise.
+	* sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_wrlock.S: Likewise.
+	* sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S: Likewise.
+	* sysdeps/unix/sysv/linux/x86_64/pthread_once.S: Likewise.
+	* sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_timedwrlock.S:
+	Likewise.
+	* sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S: Likewise.
+	* sysdeps/unix/sysv/linux/x86_64/lowlevellock.S: Likewise.
+	* sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S: Likewise.
+	* sysdeps/unix/sysv/linux/x86_64/sem_post.S: Likewise.
+	* sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S: Likewise.
+
 2005-03-28  Daniel Jacobowitz  <dan@codesourcery.com>

 	* sysdeps/mips/Makefile: New file.
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S
@ -1,4 +1,4 @@
-/* Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
+/* Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.

@ -48,12 +48,16 @@ __lll_mutex_lock_wait:

 	xorq	%r10, %r10	/* No timeout.  */
 	movl	$2, %edx
-	movq	%r10, %rsi	/* movq $FUTEX_WAIT, %rsi */
+#if FUTEX_WAIT == 0
+	xorl	%esi, %esi
+#else
+	movl	$FUTEX_WAIT, %esi
+#endif

 	cmpl	%edx, %eax	/* NB:	 %edx == 2 */
 	jne	2f

-1:	movq	$SYS_futex, %rax
+1:	movl	$SYS_futex, %eax
 	syscall

 2:	movl	%edx, %eax
@ -93,7 +97,7 @@ __lll_mutex_timedlock_wait:
 1:
 	/* Get current time.  */
 	movq	%rsp, %rdi
-	xorq	%rsi, %rsi
+	xorl	%esi, %esi
 	movq	$VSYSCALL_ADDR_vgettimeofday, %rax
 	/* This is a regular function call, all caller-save registers
 	   might be clobbered.  */
@ -101,7 +105,7 @@ __lll_mutex_timedlock_wait:

 	/* Compute relative timeout.  */
 	movq	8(%rsp), %rax
-	movq	$1000, %rdi
+	movl	$1000, %edi
 	mul	%rdi		/* Milli seconds to nano seconds.  */
 	movq	(%r13), %rdi
 	movq	8(%r13), %rsi
@ -126,9 +130,13 @@ __lll_mutex_timedlock_wait:
 	je	8f

 	movq	%rsp, %r10
-	xorq	%rsi, %rsi	/* movq $FUTEX_WAIT, %rsi */
+#if FUTEX_WAIT == 0
+	xorl	%esi, %esi
+#else
+	movl	$FUTEX_WAIT, %esi
+#endif
 	movq	%r12, %rdi
-	movq	$SYS_futex, %rax
+	movl	$SYS_futex, %eax
 	syscall
 	movq	%rax, %rcx

@ -195,9 +203,9 @@ __lll_mutex_unlock_wake:
 	pushq	%rdx

 	movl	$0, (%rdi)
-	movq	$FUTEX_WAKE, %rsi
+	movl	$FUTEX_WAKE, %esi
 	movl	$1, %edx	/* Wake one thread.  */
-	movq	$SYS_futex, %rax
+	movl	$SYS_futex, %eax
 	syscall

 	popq	%rdx
@ -222,13 +230,13 @@ __lll_timedwait_tid:

 	/* Get current time.  */
 2:	movq	%rsp, %rdi
-	xorq	%rsi, %rsi
+	xorl	%esi, %esi
 	movq	$VSYSCALL_ADDR_vgettimeofday, %rax
 	callq	*%rax

 	/* Compute relative timeout.  */
 	movq	8(%rsp), %rax
-	movq	$1000, %rdi
+	movl	$1000, %edi
 	mul	%rdi		/* Milli seconds to nano seconds.  */
 	movq	(%r13), %rdi
 	movq	8(%r13), %rsi
@ -248,9 +256,13 @@ __lll_timedwait_tid:
 	jz	4f

 	movq	%rsp, %r10
-	xorq	%rsi, %rsi	/* movq $FUTEX_WAIT, %rsi */
+#if FUTEX_WAIT == 0
+	xorl	%esi, %esi
+#else
+	movl	$FUTEX_WAIT, %esi
+#endif
 	movq	%r12, %rdi
-	movq	$SYS_futex, %rax
+	movl	$SYS_futex, %eax
 	syscall

 	cmpl	$0, (%rdi)
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_barrier_wait.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_barrier_wait.S
@ -1,4 +1,4 @@
-/* Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
+/* Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.

@ -63,9 +63,14 @@ pthread_barrier_wait:

 	/* Wait for the remaining threads.  The call will return immediately
 	   if the CURR_EVENT memory has meanwhile been changed.  */
-7:	xorq	%rsi, %rsi		/* movq $FUTEX_WAIT, %rsi */
+7:
+#if FUTEX_WAIT == 0
+	xorl	%esi, %esi
+#else
+	movl	$FUTEX_WAIT, %esi
+#endif
 	xorq	%r10, %r10
-8:	movq	$SYS_futex, %rax
+8:	movl	$SYS_futex, %eax
 	syscall

 	/* Don't return on spurious wakeups.  The syscall does not change
@ -110,8 +115,8 @@ pthread_barrier_wait:
 	/* Wake up all waiters.  The count is a signed number in the kernel
 	   so 0x7fffffff is the highest value.  */
 	movl	$0x7fffffff, %edx
-	movq	$FUTEX_WAKE, %rsi
-	movq	$SYS_futex, %rax
+	movl	$FUTEX_WAKE, %esi
+	movl	$SYS_futex, %eax
 	syscall

 	/* Increment LEFT.  If this brings the count back to the
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S
@ -1,4 +1,4 @@
-/* Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
+/* Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.

@ -81,10 +81,10 @@ __pthread_cond_broadcast:
 	je	9f

 	/* Wake up all threads.  */
-	movq	$FUTEX_CMP_REQUEUE, %rsi
-	movq	$SYS_futex, %rax
+	movl	$FUTEX_CMP_REQUEUE, %esi
+	movl	$SYS_futex, %eax
 	movl	$1, %edx
-	movq	$0x7fffffff, %r10
+	movl	$0x7fffffff, %r10d
 	syscall

 	/* For any kind of error, which mainly is EAGAIN, we try again
@ -128,9 +128,9 @@ __pthread_cond_broadcast:
 	jmp	8b

 9:	/* The futex requeue functionality is not available.  */
-	movq	$0x7fffffff, %rdx
-	movq	$FUTEX_WAKE, %rsi
-	movq	$SYS_futex, %rax
+	movl	$0x7fffffff, %edx
+	movl	$FUTEX_WAKE, %esi
+	movl	$SYS_futex, %eax
 	syscall
 	jmp	10b
 	.size	__pthread_cond_broadcast, .-__pthread_cond_broadcast
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S
@ -1,4 +1,4 @@
-/* Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
+/* Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.

@ -66,9 +66,9 @@ __pthread_cond_signal:
 	addl	$1, (%rdi)

 	/* Wake up one thread.  */
-	movq	$FUTEX_WAKE, %rsi
-	movq	$SYS_futex, %rax
-	movq	$1, %rdx
+	movl	$FUTEX_WAKE, %esi
+	movl	$SYS_futex, %eax
+	movl	$1, %edx
 	syscall

 	/* Unlock.  */
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
@ -1,4 +1,4 @@
-/* Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
+/* Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.

@ -56,7 +56,7 @@ __pthread_cond_timedwait:
 .Lsubq:

 	cmpq	$1000000000, 8(%rdx)
-	movq	$EINVAL, %rax
+	movl	$EINVAL, %eax
 	jae	18f

 	/* Stack frame:
@ -102,7 +102,7 @@ __pthread_cond_timedwait:

 	/* Unlock the mutex.  */
 2:	movq	16(%rsp), %rdi
-	xorq	%rsi, %rsi
+	xorl	%esi, %esi
 	callq	__pthread_mutex_unlock_usercnt

 	testl	%eax, %eax
@ -141,7 +141,7 @@ __pthread_cond_timedwait:
 	/* Only clocks 0 and 1 are allowed so far.  Both are handled in the
 	   kernel.  */
 	leaq	24(%rsp), %rsi
-	movq	$__NR_clock_gettime, %rax
+	movl	$__NR_clock_gettime, %eax
 	syscall
 # ifndef __ASSUME_POSIX_TIMERS
 	cmpq	$-ENOSYS, %rax
@ -155,13 +155,13 @@ __pthread_cond_timedwait:
 	subq	32(%rsp), %rdx
 #else
 	leaq	24(%rsp), %rdi
-	xorq	%rsi, %rsi
+	xorl	%esi, %esi
 	movq	$VSYSCALL_ADDR_vgettimeofday, %rax
 	callq	*%rax

 	/* Compute relative timeout.  */
 	movq	32(%rsp), %rax
-	movq	$1000, %rdx
+	movl	$1000, %edx
 	mul	%rdx		/* Milli seconds to nano seconds.  */
 	movq	(%r13), %rcx
 	movq	8(%r13), %rdx
@ -195,10 +195,14 @@ __pthread_cond_timedwait:
 	movl	%eax, (%rsp)

 	leaq	24(%rsp), %r10
-	xorq	%rsi, %rsi	/* movq $FUTEX_WAIT, %rsi */
+#if FUTEX_WAIT == 0
+	xorl	%esi, %esi
+#else
+	movl	$FUTEX_WAIT, %esi
+#endif
 	movq	%r12, %rdx
 	addq	$cond_futex, %rdi
-	movq	$SYS_futex, %rax
+	movl	$SYS_futex, %eax
 	syscall
 	movq	%rax, %r14

@ -237,7 +241,7 @@ __pthread_cond_timedwait:

 13:	incq	wakeup_seq(%rdi)
 	incl	cond_futex(%rdi)
-	movq	$ETIMEDOUT, %r14
+	movl	$ETIMEDOUT, %r14d
 	jmp	14f

 23:	xorq	%r14, %r14
@ -256,8 +260,8 @@ __pthread_cond_timedwait:
 	jne	25f

 	addq	$cond_nwaiters, %rdi
-	movq	$SYS_futex, %rax
-	movq	$FUTEX_WAKE, %rsi
+	movl	$SYS_futex, %eax
+	movl	$FUTEX_WAKE, %esi
 	movl	$1, %edx
 	syscall
 	subq	$cond_nwaiters, %rdi
@ -349,13 +353,13 @@ __pthread_cond_timedwait:
 #if defined __NR_clock_gettime && !defined __ASSUME_POSIX_TIMERS
 	/* clock_gettime not available.  */
 19:	leaq	24(%rsp), %rdi
-	xorq	%rsi, %rsi
+	xorl	%esi, %esi
 	movq	$VSYSCALL_ADDR_vgettimeofday, %rax
 	callq	*%rax

 	/* Compute relative timeout.  */
 	movq	32(%rsp), %rax
-	movq	$1000, %rdx
+	movl	$1000, %edx
 	mul	%rdx		/* Milli seconds to nano seconds.  */
 	movq	(%r13), %rcx
 	movq	8(%r13), %rdx
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
@ -1,4 +1,4 @@
-/* Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
+/* Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.

@ -82,12 +82,12 @@ __condvar_cleanup:
 	jne	4f

 	addq	$cond_nwaiters, %rdi
-	movq	$SYS_futex, %rax
-	movq	$FUTEX_WAKE, %rsi
+	movl	$SYS_futex, %eax
+	movl	$FUTEX_WAKE, %esi
 	movl	$1, %edx
 	syscall
 	subq	$cond_nwaiters, %rdi
-	movq	$1, %r12
+	movl	$1, %r12d

 4:	LOCK
 #if cond_lock == 0
@ -105,9 +105,9 @@ __condvar_cleanup:
 2:	testq	%r12, %r12
 	jnz	5f
 	addq	$cond_futex, %rdi
-	movq	$FUTEX_WAKE, %rsi
+	movl	$FUTEX_WAKE, %esi
 	movl	$0x7fffffff, %edx
-	movq	$SYS_futex, %rax
+	movl	$SYS_futex, %eax
 	syscall

 5:	movq	16(%r8), %rdi
@ -170,7 +170,7 @@ __pthread_cond_wait:

 	/* Unlock the mutex.  */
 2:	movq	16(%rsp), %rdi
-	xorq	%rsi, %rsi
+	xorl	%esi, %esi
 	callq	__pthread_mutex_unlock_usercnt

 	testl	%eax, %eax
@ -215,8 +215,12 @@ __pthread_cond_wait:
 	xorq	%r10, %r10
 	movq	%r12, %rdx
 	addq	$cond_futex-cond_lock, %rdi
-	movq	$SYS_futex, %rax
-	movq	%r10, %rsi	/* movq $FUTEX_WAIT, %rsi */
+	movl	$SYS_futex, %eax
+#if FUTEX_WAIT == 0
+	xorl	%esi, %esi
+#else
+	movl	$FUTEX_WAIT, %esi
+#endif
 	syscall

 	movl	(%rsp), %edi
@ -262,8 +266,8 @@ __pthread_cond_wait:
 	jne	17f

 	addq	$cond_nwaiters, %rdi
-	movq	$SYS_futex, %rax
-	movq	$FUTEX_WAKE, %rsi
+	movl	$SYS_futex, %eax
+	movl	$FUTEX_WAKE, %esi
 	movl	$1, %edx
 	syscall
 	subq	$cond_nwaiters, %rdi
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_once.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_once.S
@ -1,4 +1,4 @@
-/* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
+/* Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.

@ -74,8 +74,12 @@ __pthread_once:
 	jnz	3f	/* Different for generation -> run initializer.  */

 	/* Somebody else got here first.  Wait.  */
-	movq	%r10, %rsi		/* movq $FUTEX_WAIT, %rsi */
-	movq	$SYS_futex, %rax
+#if FUTEX_WAIT == 0
+	xorl	%esi, %esi
+#else
+	movl	$FUTEX_WAIT, %esi
+#endif
+	movl	$SYS_futex, %eax
 	syscall
 	jmp	6b

@ -98,12 +102,12 @@ __pthread_once:
 	/* Wake up all other threads.  */
 	movl	$0x7fffffff, %edx
 	movl	$FUTEX_WAKE, %esi
-	movq	$SYS_futex, %rax
+	movl	$SYS_futex, %eax
 	syscall

 4:	addq	$8, %rsp
 .Ladd:
-	xorq	%rax, %rax
+	xorl	%eax, %eax
 	retq

 	.size	__pthread_once,.-__pthread_once
@ -124,8 +128,8 @@ clear_once_control:
 	movl	$0, (%rdi)

 	movl	$0x7fffffff, %edx
-	movq	$FUTEX_WAKE, %rsi
-	movq	$SYS_futex, %rax
+	movl	$FUTEX_WAKE, %esi
+	movl	$SYS_futex, %eax
 	syscall

 	movq	%r8, %rdi
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_rdlock.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_rdlock.S
@ -1,4 +1,4 @@
-/* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
+/* Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.

@ -74,8 +74,12 @@ __pthread_rwlock_rdlock:
 	jne	10f

 11:	addq	$READERS_WAKEUP, %rdi
-	movq	%r10, %rsi	/* movq $FUTEX_WAIT, %rsi */
-	movq	$SYS_futex, %rax
+#if FUTEX_WAIT == 0
+	xorl	%esi, %esi
+#else
+	movl	$FUTEX_WAIT, %esi
+#endif
+	movl	$SYS_futex, %eax
 	syscall

 	subq	$READERS_WAKEUP, %rdi
@ -94,7 +98,7 @@ __pthread_rwlock_rdlock:
 13:	decl	READERS_QUEUED(%rdi)
 	jmp	2b

-5:	xorq	%rdx, %rdx
+5:	xorl	%edx, %edx
 	incl	NR_READERS(%rdi)
 	je	8f
 9:	LOCK
@ -122,7 +126,7 @@ __pthread_rwlock_rdlock:
 14:	cmpl	%fs:TID, %eax
 	jne	3b
 	/* Deadlock detected.  */
-	movq	$EDEADLK, %rdx
+	movl	$EDEADLK, %edx
 	jmp	9b

 6:
@ -137,12 +141,12 @@ __pthread_rwlock_rdlock:

 	/* Overflow.  */
 8:	decl	NR_READERS(%rdi)
-	movq	$EAGAIN, %rdx
+	movl	$EAGAIN, %edx
 	jmp	9b

 	/* Overflow.  */
 4:	decl	READERS_QUEUED(%rdi)
-	movq	$EAGAIN, %rdx
+	movl	$EAGAIN, %edx
 	jmp	9b

 10:
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_timedrdlock.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_timedrdlock.S
@ -1,4 +1,4 @@
-/* Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
+/* Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.

@ -90,13 +90,13 @@ pthread_rwlock_timedrdlock:

 	/* Get current time.  */
 11:	movq	%rsp, %rdi
-	xorq	%rsi, %rsi
+	xorl	%esi, %esi
 	movq	$VSYSCALL_ADDR_vgettimeofday, %rax
 	callq	*%rax

 	/* Compute relative timeout.  */
 	movq	8(%rsp), %rax
-	movq	$1000, %rdi
+	movl	$1000, %edi
 	mul	%rdi		/* Milli seconds to nano seconds.  */
 	movq	(%r13), %rcx
 	movq	8(%r13), %rdi
@ -112,11 +112,15 @@ pthread_rwlock_timedrdlock:
 	movq	%rcx, (%rsp)	/* Store relative timeout.  */
 	movq	%rdi, 8(%rsp)

-	xorq	%rsi, %rsi	/* movq $FUTEX_WAIT, %rsi */
+#if FUTEX_WAIT == 0
+	xorl	%esi, %esi
+#else
+	movl	$FUTEX_WAIT, %esi
+#endif
 	movq	%rsp, %r10
 	movl	%r14d, %edx
 	leaq	READERS_WAKEUP(%r12), %rdi
-	movq	$SYS_futex, %rax
+	movl	$SYS_futex, %eax
 	syscall
 	movq	%rax, %rdx
 17:
@ -136,11 +140,11 @@ pthread_rwlock_timedrdlock:
 	cmpq	$-ETIMEDOUT, %rdx
 	jne	2b

-18:	movq	$ETIMEDOUT, %rdx
+18:	movl	$ETIMEDOUT, %edx
 	jmp	9f


-5:	xorq	%rdx, %rdx
+5:	xorl	%edx, %edx
 	incl	NR_READERS(%r12)
 	je	8f
 9:	LOCK
@ -168,7 +172,7 @@ pthread_rwlock_timedrdlock:

 14:	cmpl	%fs:TID, %eax
 	jne	3b
-	movq	$EDEADLK, %rdx
+	movl	$EDEADLK, %edx
 	jmp	9b

 6:
@ -182,12 +186,12 @@ pthread_rwlock_timedrdlock:

 	/* Overflow.  */
 8:	decl	NR_READERS(%r12)
-	movq	$EAGAIN, %rdx
+	movl	$EAGAIN, %edx
 	jmp	9b

 	/* Overflow.  */
 4:	decl	READERS_QUEUED(%r12)
-	movq	$EAGAIN, %rdx
+	movl	$EAGAIN, %edx
 	jmp	9b

 10:
@ -211,6 +215,6 @@ pthread_rwlock_timedrdlock:
 16:	movq	$-ETIMEDOUT, %rdx
 	jmp	17b

-19:	movq	$EINVAL, %rdx
+19:	movl	$EINVAL, %edx
 	jmp	9b
 	.size	pthread_rwlock_timedrdlock,.-pthread_rwlock_timedrdlock
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_timedwrlock.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_timedwrlock.S
@ -1,4 +1,4 @@
-/* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
+/* Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.

@ -86,13 +86,13 @@ pthread_rwlock_timedwrlock:

 	/* Get current time.  */
 11:	movq	%rsp, %rdi
-	xorq	%rsi, %rsi
+	xorl	%esi, %esi
 	movq	$VSYSCALL_ADDR_vgettimeofday, %rax
 	callq	*%rax

 	/* Compute relative timeout.  */
 	movq	8(%rsp), %rax
-	movq	$1000, %rdi
+	movl	$1000, %edi
 	mul	%rdi		/* Milli seconds to nano seconds.  */
 	movq	(%r13), %rcx
 	movq	8(%r13), %rdi
@ -108,11 +108,15 @@ pthread_rwlock_timedwrlock:
 	movq	%rcx, (%rsp)	/* Store relative timeout.  */
 	movq	%rdi, 8(%rsp)

-	xorq	%rsi, %rsi	/* movq $FUTEX_WAIT, %rsi */
+#if FUTEX_WAIT == 0
+	xorl	%esi, %esi
+#else
+	movl	$FUTEX_WAIT, %esi
+#endif
 	movq	%rsp, %r10
 	movl	%r14d, %edx
 	leaq	WRITERS_WAKEUP(%r12), %rdi
-	movq	$SYS_futex, %rax
+	movl	$SYS_futex, %eax
 	syscall
 	movq	%rax, %rdx
 17:
@ -132,11 +136,11 @@ pthread_rwlock_timedwrlock:
 	cmpq	$-ETIMEDOUT, %rdx
 	jne	2b

-18:	movq	$ETIMEDOUT, %rdx
+18:	movl	$ETIMEDOUT, %edx
 	jmp	9f


-5:	xorq	%rdx, %rdx
+5:	xorl	%edx, %edx
 	movl	%fs:TID, %eax
 	movl	%eax, WRITER(%r12)
 9:	LOCK
@ -164,7 +168,7 @@ pthread_rwlock_timedwrlock:

 14:	cmpl	%fs:TID, %eax
 	jne	3b
-20:	movq	$EDEADLK, %rdx
+20:	movl	$EDEADLK, %edx
 	jmp	9b

 6:
@ -178,7 +182,7 @@ pthread_rwlock_timedwrlock:

 	/* Overflow.  */
 4:	decl	WRITERS_QUEUED(%r12)
-	movq	$EAGAIN, %rdx
+	movl	$EAGAIN, %edx
 	jmp	9b

 10:
@ -202,6 +206,6 @@ pthread_rwlock_timedwrlock:
 16:	movq	$-ETIMEDOUT, %rdx
 	jmp	17b

-19:	movq	$EINVAL, %rdx
+19:	movl	$EINVAL, %edx
 	jmp	9b
 	.size	pthread_rwlock_timedwrlock,.-pthread_rwlock_timedwrlock
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_unlock.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_unlock.S
@ -1,4 +1,4 @@
-/* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
+/* Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.

@ -56,7 +56,7 @@ __pthread_rwlock_unlock:

 5:	movl	$0, WRITER(%rdi)

-	movq	$1, %rsi
+	movl	$1, %esi
 	leaq	WRITERS_WAKEUP(%rdi), %r10
 	movq	%rsi, %rdx
 	cmpl	$0, WRITERS_QUEUED(%rdi)
@ -78,11 +78,11 @@ __pthread_rwlock_unlock:
 #endif
 	jne	7f

-8:	movq	$SYS_futex, %rax
+8:	movl	$SYS_futex, %eax
 	movq	%r10, %rdi
 	syscall

-	xorq	%rax, %rax
+	xorl	%eax, %eax
 	retq

 	.align	16
@ -94,7 +94,7 @@ __pthread_rwlock_unlock:
 #endif
 	jne	3f

-4:	xorq	%rax, %rax
+4:	xorl	%eax, %eax
 	retq

 1:
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_wrlock.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_rwlock_wrlock.S
@ -1,4 +1,4 @@
-/* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
+/* Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.

@ -72,8 +72,12 @@ __pthread_rwlock_wrlock:
 	jne	10f

 11:	addq	$WRITERS_WAKEUP, %rdi
-	movq	%r10, %rsi	/* movq $FUTEX_WAIT, %rsi */
-	movq	$SYS_futex, %rax
+#if FUTEX_WAIT == 0
+	xorl	%esi, %esi
+#else
+	movl	$FUTEX_WAIT, %esi
+#endif
+	movl	$SYS_futex, %eax
 	syscall

 	subq	$WRITERS_WAKEUP, %rdi
@ -92,7 +96,7 @@ __pthread_rwlock_wrlock:
 13:	decl	WRITERS_QUEUED(%rdi)
 	jmp	2b

-5:	xorq	%rdx, %rdx
+5:	xorl	%edx, %edx
 	movl	%fs:TID, %eax
 	movl	%eax, WRITER(%rdi)
 9:	LOCK
@ -119,7 +123,7 @@ __pthread_rwlock_wrlock:

 14:	cmpl	%fs:TID, %eax
 	jne	3b
-	movq	$EDEADLK, %rdx
+	movl	$EDEADLK, %edx
 	jmp	9b

 6:
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/sem_post.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/sem_post.S
@ -1,4 +1,4 @@
-/* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
+/* Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.

@ -41,8 +41,8 @@ sem_post:
 	LOCK
 	xaddl	%edx, (%rdi)

-	movq	$SYS_futex, %rax
-	movq	$FUTEX_WAKE, %rsi
+	movl	$SYS_futex, %eax
+	movl	$FUTEX_WAKE, %esi
 	incl	%edx
 	syscall

--- a/nptl/sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S
@ -1,4 +1,4 @@
-/* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
+/* Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.

@ -82,14 +82,14 @@ sem_timedwait:
 7:	call	__pthread_enable_asynccancel
 	movl	%eax, 16(%rsp)

-	xorq	%rsi, %rsi
+	xorl	%esi, %esi
 	movq	%rsp, %rdi
 	movq	$VSYSCALL_ADDR_vgettimeofday, %rax
 	callq	*%rax

 	/* Compute relative timeout.  */
 	movq	8(%rsp), %rax
-	movq	$1000, %rdi
+	movl	$1000, %edi
 	mul	%rdi		/* Milli seconds to nano seconds.  */
 	movq	(%r13), %rdi
 	movq	8(%r13), %rsi
@ -107,8 +107,8 @@ sem_timedwait:

 	movq	%rsp, %r10
 	movq	%r12, %rdi
-	xorq	%rsi, %rsi
-	movq	$SYS_futex, %rax
+	xorl	%esi, %esi
+	movl	$SYS_futex, %eax
 	xorl	%edx, %edx
 	syscall
 	movq	%rax, %r14
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/sem_wait.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/sem_wait.S
@ -1,4 +1,4 @@
-/* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
+/* Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.

@ -77,7 +77,7 @@ sem_wait:
 	movl	%eax, %r8d

 	xorq	%r10, %r10
-	movq	$SYS_futex, %rax
+	movl	$SYS_futex, %eax
 	movq	%r13, %rdi
 	movq	%r10, %rsi
 	movq	%r10, %rdx
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/sysdep-cancel.h
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/sysdep-cancel.h
@ -1,4 +1,4 @@
-/* Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
+/* Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Jakub Jelinek <jakub@redhat.com>, 2002.

@ -48,7 +48,7 @@
    POPARGS_##args							      \
    /* The return value from CENABLE is argument for CDISABLE.  */	      \
    movq %rax, (%rsp);							      \
-    movq $SYS_ify (syscall_name), %rax;					      \
+    movl $SYS_ify (syscall_name), %eax;					      \
    syscall;								      \
    movq (%rsp), %rdi;							      \
    /* Save %rax since it's the error code from the syscall.  */	      \
--- a/sysdeps/ia64/fpu/Makefile
+++ b/sysdeps/ia64/fpu/Makefile
@ -27,7 +27,8 @@ sysdep_routines += libc_libm_error libm_frexp libm_frexpf libm_frexpl \
 		   $(duplicated-routines)

 sysdep-CPPFLAGS += -include libm-symbols.h \
-	-D__POSIX__ \
+	-D__POSIX__ -Dopensource \
 	-D_LIB_VERSIONIMF=_LIB_VERSION \
-	-DSIZE_INT_32 -DSIZE_LONG_INT_64 -DSIZE_LONG_LONG_INT_64
+	-DSIZE_INT_32 -DSIZE_LONG_INT_64 -DSIZE_LONG_LONG_INT_64 \
+	-DSIZE_LONG_64 -DIA64
 endif
--- a/sysdeps/ia64/fpu/e_acos.S
+++ b/sysdeps/ia64/fpu/e_acos.S
@ -824,6 +824,7 @@ acos_abs_gt_1:
 GLOBAL_LIBM_END(acos)


+
 LOCAL_LIBM_ENTRY(__libm_error_region)
 .prologue
 { .mfi
--- a/sysdeps/ia64/fpu/e_acosf.S
+++ b/sysdeps/ia64/fpu/e_acosf.S
@ -601,6 +601,7 @@ ACOSF_ABS_ONE:

 GLOBAL_LIBM_END(acosf)

+
 // Stack operations when calling error support.
 //       (1)               (2)
 //   sp   -> +          psp -> +
--- a/sysdeps/ia64/fpu/e_acosh.S
+++ b/sysdeps/ia64/fpu/e_acosh.S
@ -1139,6 +1139,7 @@ ACOSH_LESS_ONE:

 GLOBAL_LIBM_END(acosh)

+
 LOCAL_LIBM_ENTRY(__libm_error_region)
 .prologue

--- a/sysdeps/ia64/fpu/e_acoshf.S
+++ b/sysdeps/ia64/fpu/e_acoshf.S
@ -968,6 +968,7 @@ ACOSH_LESS_ONE:

 GLOBAL_LIBM_END(acoshf)

+
 LOCAL_LIBM_ENTRY(__libm_error_region)
 .prologue

--- a/sysdeps/ia64/fpu/e_acoshl.S
+++ b/sysdeps/ia64/fpu/e_acoshl.S
@ -1650,6 +1650,7 @@ acoshl_lt_pone:
 GLOBAL_LIBM_END(acoshl)


+
 LOCAL_LIBM_ENTRY(__libm_error_region)
 .prologue
 { .mfi
--- a/sysdeps/ia64/fpu/e_acosl.S
+++ b/sysdeps/ia64/fpu/e_acosl.S
@ -2482,6 +2482,7 @@ acosl_SPECIAL_CASES:

 GLOBAL_LIBM_END(acosl)

+
 LOCAL_LIBM_ENTRY(__libm_error_region)
 .prologue
 // (1)
--- a/sysdeps/ia64/fpu/e_asin.S
+++ b/sysdeps/ia64/fpu/e_asin.S
@ -800,6 +800,7 @@ asin_abs_gt_1:
 GLOBAL_LIBM_END(asin)


+
 LOCAL_LIBM_ENTRY(__libm_error_region)
 .prologue
 { .mfi
--- a/sysdeps/ia64/fpu/e_asinf.S
+++ b/sysdeps/ia64/fpu/e_asinf.S
@ -583,6 +583,7 @@ ASINF_ABS_ONE:
 ;;

 GLOBAL_LIBM_END(asinf)
+
 // Stack operations when calling error support.
 //       (1)               (2)                  
 //   sp   -> +          psp -> +               
--- a/sysdeps/ia64/fpu/e_asinl.S
+++ b/sysdeps/ia64/fpu/e_asinl.S
@ -2459,6 +2459,7 @@ SMALL_S:
 GLOBAL_LIBM_END(asinl)


+
 LOCAL_LIBM_ENTRY(__libm_error_region)
 .prologue
 // (1)
--- a/sysdeps/ia64/fpu/e_atan2.S
+++ b/sysdeps/ia64/fpu/e_atan2.S
@ -52,6 +52,7 @@
 // 08/20/02  Corrected inexact flag and directed rounding symmetry bugs
 // 02/06/03  Reordered header: .section, .global, .proc, .align
 // 04/17/03  Added missing mutex directive
+// 12/23/03  atan2(NaN1,NaN2) now QNaN1, for consistency with atan2f, atan2l
 //
 // API
 //==============================================================
@ -142,7 +143,7 @@
 //             -0                -0          -pi
 //
 //            Nan             anything      quiet Y
-//            anything        NaN           quiet X
+//            Not NaN         NaN           quiet X

 // atan2(+-0/+-0) sets double error tag to 37

@ -388,7 +389,7 @@ GLOBAL_IEEE754_ENTRY(atan2)
 }
 { .mfb
           ldfe         atan2_P21  = [EXP_AD_P2],16
-(p10)      fma.d.s0 f8 = atan2_Y,atan2_X,f0   // If y=nan, result quietized y
+(p10)      fma.d.s0 f8 = atan2_X,atan2_Y,f0   // If y=nan, result quietized y
 (p10)      br.ret.spnt b0        // Exit if y=nan
 ;;
 }
@ -985,6 +986,7 @@ ATAN2_ERROR:
 }
 GLOBAL_IEEE754_END(atan2)

+
 LOCAL_LIBM_ENTRY(__libm_error_region)
 .prologue
 // (1)
--- a/sysdeps/ia64/fpu/e_atan2f.S
+++ b/sysdeps/ia64/fpu/e_atan2f.S
@ -827,6 +827,7 @@ ATAN2F_XY_INF_NAN_ZERO:

 GLOBAL_IEEE754_END(atan2f)

+
 LOCAL_LIBM_ENTRY(__libm_error_region)
 .prologue
         mov            GR_Parameter_TAG      = 38
--- a/sysdeps/ia64/fpu/e_atanh.S
+++ b/sysdeps/ia64/fpu/e_atanh.S
@ -1008,6 +1008,7 @@ atanh_ge_one:

 GLOBAL_LIBM_END(atanh)

+
 LOCAL_LIBM_ENTRY(__libm_error_region)
 .prologue

--- a/sysdeps/ia64/fpu/e_atanhf.S
+++ b/sysdeps/ia64/fpu/e_atanhf.S
@ -782,6 +782,7 @@ atanhf_ge_one:

 GLOBAL_LIBM_END(atanhf)

+
 LOCAL_LIBM_ENTRY(__libm_error_region)
 .prologue

--- a/sysdeps/ia64/fpu/e_atanhl.S
+++ b/sysdeps/ia64/fpu/e_atanhl.S
@ -1101,6 +1101,7 @@ atanhl_gt_one:
 };;

 GLOBAL_LIBM_END(atanhl)
+
 LOCAL_LIBM_ENTRY(__libm_error_region)
 .prologue
 { .mfi
--- a/sysdeps/ia64/fpu/e_cosh.S
+++ b/sysdeps/ia64/fpu/e_cosh.S
@ -811,6 +811,7 @@ COSH_UNORM:

 GLOBAL_IEEE754_END(cosh)

+
 LOCAL_LIBM_ENTRY(__libm_error_region)
 .prologue
 { .mfi
--- a/sysdeps/ia64/fpu/e_coshf.S
+++ b/sysdeps/ia64/fpu/e_coshf.S
@ -652,6 +652,7 @@ COSH_UNORM:

 GLOBAL_IEEE754_END(coshf)

+
 LOCAL_LIBM_ENTRY(__libm_error_region)
 .prologue
 { .mfi
--- a/sysdeps/ia64/fpu/e_coshl.S
+++ b/sysdeps/ia64/fpu/e_coshl.S
@ -1033,6 +1033,7 @@ COSH_HUGE:

 GLOBAL_IEEE754_END(coshl)

+
 LOCAL_LIBM_ENTRY(__libm_error_region)
 .prologue

--- a/sysdeps/ia64/fpu/e_exp.S
+++ b/sysdeps/ia64/fpu/e_exp.S
@ -1,7 +1,7 @@
 .file "exp.s"


-// Copyright (c) 2000 - 2002, Intel Corporation
+// Copyright (c) 2000 - 2003, Intel Corporation
 // All rights reserved.
 //
 // Contributed 2000 by the Intel Numerics Group, Intel Corporation
@ -52,6 +52,7 @@
 // 05/20/02 Cleaned up namespace and sf0 syntax
 // 09/07/02 Force inexact flag
 // 11/15/02 Split underflow path into zero/nonzero; eliminated fma in main path
+// 05/30/03 Set inexact flag on unmasked overflow/underflow

 // API
 //==============================================================
@ -602,7 +603,7 @@ EXP_CERTAIN_OVERFLOW:
 }
 { .mfb
      mov             GR_Parameter_TAG = 14
-      fma.d.s0        FR_RESULT = fTmp, fTmp, f0    // Set I,O and +INF result
+      fma.d.s0        FR_RESULT = fTmp, fTmp, fTmp    // Set I,O and +INF result
      br.cond.sptk    __libm_error_region
 }
 ;;
@ -685,6 +686,13 @@ EXP_CERTAIN_UNDERFLOW:
 }
 ;;

+{ .mfi
+      nop.m           0
+      fmerge.se       fTmp = fTmp, fLn2_by_128_lo // Small with signif lsb 1
+      nop.i           0
+}
+;;
+      
 { .mfb
      nop.m           0
      fma.d.s0        f8 = fTmp, fTmp, f0 // Set I,U, tiny (+0.0) result
@ -730,6 +738,7 @@ EXP_UNDERFLOW_ZERO:

 GLOBAL_IEEE754_END(exp)

+
 LOCAL_LIBM_ENTRY(__libm_error_region)
 .prologue
 { .mfi
--- a/sysdeps/ia64/fpu/e_exp10.S
+++ b/sysdeps/ia64/fpu/e_exp10.S
@ -1,7 +1,7 @@
 .file "exp10.s"


-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (c) 2000 - 2004, Intel Corporation
 // All rights reserved.
 //
 // Contributed 2000 by the Intel Numerics Group, Intel Corporation
@ -43,6 +43,7 @@
 // 05/20/02 Cleaned up namespace and sf0 syntax
 // 09/06/02 Improved performance; no inexact flags on exact cases
 // 01/29/03 Added missing } to bundle templates
+// 12/16/04 Call error handling on underflow.
 //
 // API
 //==============================================================
@ -81,8 +82,8 @@
 // Registers used
 //==============================================================
 // r2-r3, r14-r40
-// f6-f15, f32-f51
-// p6-p9, p12
+// f6-f15, f32-f52
+// p6-p12
 //


@ -104,6 +105,7 @@ GR_EXPMAX           = r24
 GR_BIAS53           = r25

 GR_ROUNDVAL         = r26
+GR_SNORM_LIMIT      = r26
 GR_MASK             = r27
 GR_KF0              = r28
 GR_MASK_low         = r29
@ -161,6 +163,7 @@ FR_E                = f49
 FR_exact_limit      = f50

 FR_int_x            = f51
+FR_SNORM_LIMIT      = f52


 // Data tables
@ -256,8 +259,12 @@ GLOBAL_IEEE754_ENTRY(exp10)
 }
 ;;

-{.mib
+{.mlx
       ldfe FR_LOG2_10= [ GR_COEFF_START ], 16  // load log2(10)*2^(10-63)
+       movl GR_SNORM_LIMIT= 0xc0733a7146f72a41  // Smallest normal threshold
+}
+{.mib
+       nop.m 0
       nop.i 0
 (p12) br.cond.spnt SPECIAL_exp10               // Branch if nan, inf, zero
 }
@ -284,7 +291,7 @@ GLOBAL_IEEE754_ENTRY(exp10)
 ;;

 {.mfi
-       nop.m 0
+       setf.d FR_SNORM_LIMIT= GR_SNORM_LIMIT      // Set smallest normal limit
       fma.s1 FR_L2_10_high= FR_LOG2_10, FR_2P53, f0 // FR_LOG2_10= log2(10)_hi
       nop.i 0
 }
@ -388,6 +395,13 @@ GLOBAL_IEEE754_ENTRY(exp10)
 }
 ;;

+{.mfi
+       nop.m 0
+       fcmp.ge.s1 p11,p0= f8, FR_SNORM_LIMIT      // Test x for normal range
+       nop.i 0
+}
+;;
+
 {.mfi
       nop.m 0
       fma.s1 FR_E= FR_E0, FR_COEFF1, f0          // E= C_1*e
@ -431,10 +445,17 @@ GLOBAL_IEEE754_ENTRY(exp10)
 {.mfb
       nop.m 0
 (p9)  fma.d.s1 f8= FR_P, FR_T, FR_T              // result= T+T*P, exact use s1
-       br.ret.sptk b0                             // return
+ (p11) br.ret.sptk b0                             // return, if result normal
 }
 ;;

+// Here if result in denormal range (and not zero)
+{.mib
+       nop.m 0
+       mov GR_Parameter_TAG= 265
+       br.cond.sptk __libm_error_region           // Branch to error handling
+}
+;;

 SPECIAL_exp10:
 {.mfi
@ -487,53 +508,35 @@ SPECIAL_exp10:

 OUT_RANGE_exp10:

+// underflow: p6= 1
 // overflow: p8= 1

-{.mii
+.pred.rel "mutex",p6,p8
+{.mmi
 (p8)  mov GR_EXPMAX= 0x1fffe
-       nop.i 0
-       nop.i 0
-}
-;;
-
-
-{.mmb
- (p8)  mov GR_Parameter_TAG= 166
- (p8)  setf.exp FR_R= GR_EXPMAX
-       nop.b 999
-}
-;;
-
-{.mfi
-       nop.m 999
- (p8)  fma.d.s0 f8= FR_R, FR_R, f0                // Create overflow
-       nop.i 999
-}
-// underflow: p6= 1
-{.mii
-       nop.m 0
 (p6)  mov GR_EXPMAX= 1
       nop.i 0
 }
 ;;

-{.mmb
-       nop.m 0
- (p6)  setf.exp FR_R= GR_EXPMAX
-       nop.b 999
+{.mii
+       setf.exp FR_R= GR_EXPMAX
+ (p8)  mov GR_Parameter_TAG= 166
+ (p6)  mov GR_Parameter_TAG= 265
 }
 ;;

 {.mfb
-       nop.m 999
- (p6)  fma.d.s0 f8= FR_R, FR_R, f0                // Create underflow
- (p6)  br.ret.sptk b0                  // will not call libm_error for underflow
+       nop.m 0
+       fma.d.s0 f8= FR_R, FR_R, f0                // Create overflow/underflow
+       br.cond.sptk __libm_error_region           // Branch to error handling
 }
 ;;

 GLOBAL_IEEE754_END(exp10)
 weak_alias (exp10, pow10)

+
 LOCAL_LIBM_ENTRY(__libm_error_region)

 .prologue
--- a/sysdeps/ia64/fpu/e_exp10f.S
+++ b/sysdeps/ia64/fpu/e_exp10f.S
@ -1,7 +1,7 @@
 .file "exp10f.s"


-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (c) 2000 - 2004, Intel Corporation
 // All rights reserved.
 //
 // Contributed 2000 by the Intel Numerics Group, Intel Corporation
@ -43,6 +43,7 @@
 // 05/20/02 Cleaned up namespace and sf0 syntax
 // 09/06/02 Improved performance and accuracy; no inexact flags on exact cases
 // 01/29/03 Added missing } to bundle templates
+// 12/16/04 Call error handling on underflow.
 //
 // API
 //==============================================================
@ -80,8 +81,8 @@
 // Registers used
 //==============================================================
 // r2-r3, r14-r40
-// f6-f15, f32-f51
-// p6-p9, p12
+// f6-f15, f32-f52
+// p6-p12
 //


@ -102,6 +103,7 @@ GR_Fh_ADDR          = r23
 GR_EXPMAX           = r24

 GR_ROUNDVAL         = r26
+GR_SNORM_LIMIT      = r26
 GR_MASK             = r27
 GR_KF0              = r28
 GR_MASK_low         = r29
@ -153,6 +155,7 @@ FR_E                = f49
 FR_exact_limit      = f50

 FR_int_x            = f51
+FR_SNORM_LIMIT      = f52


 // Data tables
@ -246,8 +249,12 @@ GLOBAL_IEEE754_ENTRY(exp10f)
 }
 ;;

-{.mib
+{.mlx
       ldfe FR_LOG2_10= [ GR_COEFF_START ], 16  // load log2(10)*2^(10-63)
+       movl GR_SNORM_LIMIT= 0xc217b818          // Smallest normal threshold
+}
+{.mib
+       nop.m 0
       nop.i 0
 (p12) br.cond.spnt SPECIAL_exp10               // Branch if nan, inf, zero
 }
@ -261,7 +268,7 @@ GLOBAL_IEEE754_ENTRY(exp10f)
 ;;

 {.mfi
-       nop.m 0
+       setf.s FR_SNORM_LIMIT= GR_SNORM_LIMIT      // Set smallest normal limit
 (p8)  fcvt.fx.s1 FR_int_x = f8                   // Convert x to integer
       nop.i 0
 }
@ -335,7 +342,7 @@ GLOBAL_IEEE754_ENTRY(exp10f)

 {.mfb
       ldf8 FR_T_high= [ GR_Fh_ADDR ]            // load T_high= 2^{f_high}
-       nop.f 0
+       fcmp.ge.s1 p11, p0= f8, FR_SNORM_LIMIT    // Test x for normal range
 (p12) br.cond.spnt OUT_RANGE_exp10
 }
 ;;
@ -390,10 +397,17 @@ GLOBAL_IEEE754_ENTRY(exp10f)
 {.mfb
       nop.m 0
 (p9)  fma.s.s1 f8= FR_P, FR_T, FR_T              // result= T+T*P, exact use s1
-       br.ret.sptk b0                             // return
+ (p11) br.ret.sptk b0                             // return, if result normal
 }
 ;;

+// Here if result in denormal range (and not zero)
+{.mib
+       nop.m 0
+       mov GR_Parameter_TAG= 266
+       br.cond.sptk __libm_error_region           // Branch to error handling
+}
+;;

 SPECIAL_exp10:
 {.mfi
@ -446,53 +460,35 @@ SPECIAL_exp10:

 OUT_RANGE_exp10:

+// underflow: p6= 1
 // overflow: p8= 1

-{.mii
+.pred.rel "mutex",p6,p8
+{.mmi
 (p8)  mov GR_EXPMAX= 0x1fffe
-       nop.i 0
-       nop.i 0
-}
-;;
-
-
-{.mmb
- (p8)  mov GR_Parameter_TAG= 167
- (p8)  setf.exp FR_R= GR_EXPMAX
-       nop.b 999
-}
-;;
-
-{.mfi
-       nop.m 999
- (p8)  fma.s.s0 f8= FR_R, FR_R, f0                // Create overflow
-       nop.i 999
-}
-// underflow: p6= 1
-{.mii
-       nop.m 0
 (p6)  mov GR_EXPMAX= 1
       nop.i 0
 }
 ;;

-{.mmb
-       nop.m 0
- (p6)  setf.exp FR_R= GR_EXPMAX
-       nop.b 999
+{.mii
+       setf.exp FR_R= GR_EXPMAX
+ (p8)  mov GR_Parameter_TAG= 167
+ (p6)  mov GR_Parameter_TAG= 266
 }
 ;;

 {.mfb
-       nop.m 999
- (p6)  fma.s.s0 f8= FR_R, FR_R, f0                // Create underflow
- (p6)  br.ret.sptk b0                  // will not call libm_error for underflow
+       nop.m 0
+       fma.s.s0 f8= FR_R, FR_R, f0                // Create overflow/underflow
+       br.cond.sptk __libm_error_region           // Branch to error handling
 }
 ;;

 GLOBAL_IEEE754_END(exp10f)
 weak_alias (exp10f, pow10f)

+
 LOCAL_LIBM_ENTRY(__libm_error_region)

 .prologue
--- a/sysdeps/ia64/fpu/e_exp10l.S
+++ b/sysdeps/ia64/fpu/e_exp10l.S
@ -1,7 +1,7 @@
 .file "exp10l.s"


-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (c) 2000 - 2004, Intel Corporation
 // All rights reserved.
 //
 // Contributed 2000 by the Intel Numerics Group, Intel Corporation
@ -44,6 +44,7 @@
 // 02/06/03 Reordered header: .section, .global, .proc, .align
 // 05/08/03 Reformatted assembly source; corrected overflow result for round to
 //          -inf and round to zero; exact results now don't set inexact flag
+// 12/16/04 Call error handling on underflow.
 //
 // API
 //==============================================================
@ -79,9 +80,9 @@

 // Registers used
 //==============================================================
-// f6-f15, f32-f62
+// f6-f15, f32-f63
 // r14-r30, r32-r40
-// p6-p8, p12-p14
+// p6-p8, p11-p14
 //


@ -129,6 +130,7 @@
       FR_4        = f60
       FR_28       = f61
       FR_32       = f62
+       FR_SNORM_LIMIT = f63


       GR_ADDR0    = r14
@ -178,6 +180,7 @@ LOCAL_OBJECT_START(poly_coeffs)
       data8 0x3f55d87fe78a6731 // C_5
       data8 0x3f2430912f86c787 // C_6
       data8 0x9257edfe9b5fb698, 0x00003fbf // log2(10)_low (bits 64...127)
+       data8 0x9a1bc98027a81918, 0x0000c00b // Smallest normal threshold
 LOCAL_OBJECT_END(poly_coeffs)


@ -435,7 +438,7 @@ GLOBAL_IEEE754_ENTRY(exp10l)

 {.mmf
       // GR_D_ADDR = pointer to D table
-       add GR_D_ADDR = 2048-64+96+16, GR_ADDR0
+       add GR_D_ADDR = 2048-64+96+32, GR_ADDR0
       // load C_3, C_4
       ldfpd FR_COEFF3, FR_COEFF4 = [ GR_ADDR0 ], 16
       // y = x*log2(10)*2^8
@ -471,7 +474,8 @@ GLOBAL_IEEE754_ENTRY(exp10l)
 }

 {.mfi
-       nop.m 0
+       // load smallest normal limit
+       ldfe FR_SNORM_LIMIT = [ GR_ADDR0 ], 16
       // x>overflow threshold ?
       fcmp.gt.s1 p12, p7 = f8, FR_OF_TEST
       nop.i 0 ;;
@ -596,6 +600,13 @@ GLOBAL_IEEE754_ENTRY(exp10l)
       nop.i 0 ;;
 }

+{.mfi
+       nop.m 0
+       // test if x >= smallest normal limit
+       fcmp.ge.s1 p11, p0 = f8, FR_SNORM_LIMIT
+       nop.i 0 ;;
+}
+
 {.mfi
       nop.m 0
       // P36 = P34+r2*P56
@ -646,9 +657,16 @@ GLOBAL_IEEE754_ENTRY(exp10l)
       // result = T+T*P
 (p14) fma.s0 f8 = FR_COEFF3, FR_UF_TEST, FR_UF_TEST
       // return
-       br.ret.sptk b0 ;;
+ (p11) br.ret.sptk b0 ;;                  // return, if result normal
 }

+// Here if result in denormal range (and not zero)
+{.mib
+       nop.m 0
+       mov GR_Parameter_TAG= 264
+       br.cond.sptk __libm_error_region           // Branch to error handling
+}
+;;

 SPECIAL_EXP10:

@ -703,47 +721,35 @@ SPECIAL_EXP10:

 OUT_RANGE_EXP10:

-{.mii
-       // overflow: p8 = 1
-  (p8) mov GR_CONST1 = 0x1fffe
-       nop.i 0
-       nop.i 0 ;;
-}
-
-{.mmb
-  (p8) mov GR_Parameter_TAG = 165
-  (p8) setf.exp FR_KF0 = GR_CONST1
-       nop.b 999 ;;
-}
-
-{.mfi
-       nop.m 999
-  (p8) fma.s0 f8 = FR_KF0, FR_KF0, f0
-       nop.i 999
-}
-{.mii
-       nop.m 0
 // underflow: p6 = 1
-  (p6) mov GR_CONST1 = 1
-       nop.i 0 ;;
-}
+// overflow: p8 = 1

-{.mmb
-       nop.m 0
-  (p6) setf.exp FR_KF0 = GR_CONST1
-       nop.b 999 ;;
+.pred.rel "mutex",p6,p8
+{.mmi
+  (p8) mov GR_CONST1 = 0x1fffe
+  (p6) mov GR_CONST1 = 1
+       nop.i 0
 }
+;;
+
+{.mii
+       setf.exp FR_KF0 = GR_CONST1
+  (p8) mov GR_Parameter_TAG = 165
+  (p6) mov GR_Parameter_TAG = 264
+}
+;;

 {.mfb
       nop.m 999
-  (p6) fma.s0 f8 = FR_KF0, FR_KF0, f0
-       // will not call libm_error for underflow
-  (p6) br.ret.sptk b0 ;;
+       fma.s0 f8 = FR_KF0, FR_KF0, f0             // Create overflow/underflow
+       br.cond.sptk __libm_error_region           // Branch to error handling
 }
+;;

 GLOBAL_IEEE754_END(exp10l)
 weak_alias (exp10l, pow10l)

+
 LOCAL_LIBM_ENTRY(__libm_error_region)
 .prologue
 {.mfi
--- a/sysdeps/ia64/fpu/e_exp2.S
+++ b/sysdeps/ia64/fpu/e_exp2.S
@ -495,6 +495,7 @@ OUT_RANGE_exp2:

 GLOBAL_LIBM_END(exp2)

+
 LOCAL_LIBM_ENTRY(__libm_error_region)

 .prologue
--- a/sysdeps/ia64/fpu/e_exp2f.S
+++ b/sysdeps/ia64/fpu/e_exp2f.S
@ -470,6 +470,7 @@ OUT_RANGE_exp2:

 GLOBAL_LIBM_END(exp2f)

+
 LOCAL_LIBM_ENTRY(__libm_error_region)

 .prologue
--- a/sysdeps/ia64/fpu/e_exp2l.S
+++ b/sysdeps/ia64/fpu/e_exp2l.S
@ -747,6 +747,7 @@ OUT_RANGE_exp2l:

 GLOBAL_LIBM_END(exp2l)

+
 LOCAL_LIBM_ENTRY(__libm_error_region)
 .prologue
 {.mfi
--- a/sysdeps/ia64/fpu/e_expf.S
+++ b/sysdeps/ia64/fpu/e_expf.S
@ -1,7 +1,7 @@
 .file "expf.s"


-// Copyright (c) 2000 - 2002, Intel Corporation
+// Copyright (c) 2000 - 2003, Intel Corporation
 // All rights reserved.
 //
 // Contributed 2000 by the Intel Numerics Group, Intel Corporation
@ -52,6 +52,7 @@
 // 09/26/02 support of higher precision inputs added, underflow threshold
 //          corrected
 // 11/15/02 Improved performance on Itanium 2, added possible over/under paths
+// 05/30/03 Set inexact flag on unmasked overflow/underflow
 //
 //
 // API
@ -521,7 +522,7 @@ EXP_CERTAIN_OVERFLOW:
 }
 { .mfb
      mov             GR_Parameter_TAG = 16
-      fma.s.s0        FR_RESULT = fTmp, fTmp, f0 // Set I,O and +INF result
+      fma.s.s0        FR_RESULT = fTmp, fTmp, fTmp // Set I,O and +INF result
      br.cond.sptk    __libm_error_region
 }
 ;;
@ -604,6 +605,13 @@ EXP_CERTAIN_UNDERFLOW:
 }
 ;;

+{ .mfi
+      nop.m           0
+      fmerge.se       fTmp = fTmp, f64DivLn2    // Small with non-trial signif
+      nop.i           0
+}
+;;
+      
 { .mfb
      nop.m           0
      fma.s.s0        f8 = fTmp, fTmp, f0 // Set I,U, tiny (+0.0) result
@ -649,6 +657,7 @@ EXP_UNDERFLOW_ZERO:

 GLOBAL_IEEE754_END(expf)

+
 LOCAL_LIBM_ENTRY(__libm_error_region)
 .prologue
 { .mfi
--- a/sysdeps/ia64/fpu/e_fmod.S
+++ b/sysdeps/ia64/fpu/e_fmod.S
@ -499,6 +499,7 @@ FMOD_Y_ZERO:
 }

 GLOBAL_IEEE754_END(fmod)
+
 LOCAL_LIBM_ENTRY(__libm_error_region)
 .prologue
 { .mfi
--- a/sysdeps/ia64/fpu/e_fmodf.S
+++ b/sysdeps/ia64/fpu/e_fmodf.S
@ -514,6 +514,7 @@ EXP_ERROR_RETURN:
 }

 GLOBAL_IEEE754_END(fmodf)
+
 LOCAL_LIBM_ENTRY(__libm_error_region)
 .prologue
 { .mfi
--- a/sysdeps/ia64/fpu/e_fmodl.S
+++ b/sysdeps/ia64/fpu/e_fmodl.S
@ -1,7 +1,7 @@
 .file "fmodl.s"


-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (c) 2000 - 2004, Intel Corporation
 // All rights reserved.
 //
 // Contributed 2000 by the Intel Numerics Group, Intel Corporation
@ -49,6 +49,7 @@
 // 05/20/02 Cleaned up namespace and sf0 syntax
 // 02/10/03 Reordered header:.section,.global,.proc,.align
 // 04/28/03 Fix: fmod(sNaN, 0) no longer sets errno
+// 11/23/04 Reformatted routine and improved speed
 //
 // API
 //====================================================================
@ -75,9 +76,16 @@
 //
 // Registers used
 //====================================================================
-// Predicate registers: p6-p11
-// General registers:   r2,r29,r32 (ar.pfs), r33-r39
-// Floating point registers: f6-f15
+
+GR_SMALLBIASEXP     = r2
+GR_2P32             = r3
+GR_SMALLBIASEXP     = r20
+GR_ROUNDCONST       = r21
+GR_SIG_B            = r22
+GR_ARPFS            = r23
+GR_TMP1             = r24
+GR_TMP2             = r25
+GR_TMP3             = r26

 GR_SAVE_B0          = r33
 GR_SAVE_PFS         = r34
@ -93,6 +101,30 @@ FR_X             = f10
 FR_Y                = f9
 FR_RESULT           = f8

+FR_ABS_A            = f6
+FR_ABS_B            = f7
+FR_Y_INV            = f10
+FR_SMALLBIAS        = f11
+FR_E0               = f12
+FR_Q                = f13
+FR_E1               = f14
+FR_2P32             = f15
+FR_TMPX             = f32
+FR_TMPY             = f33
+FR_ROUNDCONST       = f34
+FR_QINT             = f35
+FR_QRND24           = f36
+FR_NORM_B           = f37
+FR_TMP              = f38
+FR_TMP2             = f39
+FR_DFLAG            = f40
+FR_Y_INV0           = f41
+FR_Y_INV1           = f42
+FR_Q0               = f43
+FR_Q1               = f44
+FR_QINT_Z           = f45
+FR_QREM             = f46
+FR_B_SGN_A          = f47

 .section .text
 GLOBAL_IEEE754_ENTRY(fmodl)
@ -101,291 +133,306 @@ GLOBAL_IEEE754_ENTRY(fmodl)
 // result in f8

 { .mfi
-  alloc r32=ar.pfs,1,4,4,0
-  // f6=|a|
-  fmerge.s f6=f0,f8
-  mov r2 = 0x0ffdd
+       getf.sig GR_SIG_B = f9
+       // FR_ABS_A = |a|
+       fmerge.s FR_ABS_A = f0, f8
+       mov GR_SMALLBIASEXP = 0x0ffdd
 }
 { .mfi
-  getf.sig r29=f9
-  // f7=|b|
-  fmerge.s f7=f0,f9
-  nop.i 0;;
+       nop.m 0
+       // FR_ABS_B = |b|
+       fmerge.s FR_ABS_B = f0, f9
+       nop.i 0
 }
+;;

 { .mfi
-  setf.exp f11 = r2
+       setf.exp FR_SMALLBIAS = GR_SMALLBIASEXP
       // (1) y0
-  frcpa.s1 f10,p6=f6,f7
-  nop.i 0;;
+       frcpa.s1 FR_Y_INV0, p6 = FR_ABS_A, FR_ABS_B
+       nop.i 0
 }
+;;
+
+{ .mlx
+       nop.m 0
+       movl GR_ROUNDCONST = 0x33a00000
+}
+;;

 // eliminate special cases
 { .mmi
       nop.m 0
       nop.m 0
       // y pseudo-zero ?
-cmp.eq p7,p10=r29,r0;;
+       cmp.eq p7, p10 = GR_SIG_B, r0
 }
+;;

-// Y +-NAN, +-inf, +-0?     p7
+// set p7 if b +/-NAN, +/-inf, +/-0
 { .mfi
-      nop.m 999
+       nop.m 0
 (p10) fclass.m p7, p10 = f9, 0xe7
-      nop.i 999;;
-}
-
-// qnan snan inf norm     unorm 0 -+
-// 1    1    1   0        0     0 11
-// e                      3
-// X +-NAN, +-inf, ?        p9
-
-{ .mfi
-      nop.m 999
-      fclass.m.unc  p9,p11 = f8, 0xe3
-      nop.i 999
-}
-
-// |x| < |y|? Return x p8
-{ .mfi
-      nop.m 999
-(p10)  fcmp.lt.unc.s1 p8,p0 = f6,f7
-      nop.i 999 ;;
-}
-
-  { .mfi
-  mov r2=0x1001f
-  // (2) q0=a*y0
-  (p6) fma.s1 f13=f6,f10,f0
       nop.i 0
-} { .mfi
+}
+;;
+
+{ .mfi
+       mov GR_2P32 = 0x1001f
+       // (2) q0 = a*y0
+ (p6)  fma.s1 FR_Q0 = FR_ABS_A, FR_Y_INV0, f0
+       nop.i 0
+}
+{ .mfi
       nop.m 0
       // (3) e0 = 1 - b * y0
-  (p6) fnma.s1 f12=f7,f10,f1
-  nop.i 0;;
+ (p6)  fnma.s1 FR_E0 = FR_ABS_B, FR_Y_INV0, f1
+       nop.i 0
 }
+;;

-// Y +-NAN, +-inf, +-0?     p7
+// set p9 if a +/-NAN, +/-inf
 { .mfi
-      nop.m 999
+       nop.m 0
+       fclass.m.unc p9, p11 = f8, 0xe3
+       nop.i 0
+}
+       // |a| < |b|? Return a, p8=1
+{ .mfi
+       nop.m 0
+ (p10) fcmp.lt.unc.s1 p8, p0 = FR_ABS_A, FR_ABS_B
+       nop.i 0
+}
+;;
+
+// set p7 if b +/-NAN, +/-inf, +/-0
+{ .mfi
+       nop.m 0
       // pseudo-NaN ?
 (p10) fclass.nm p7, p0 = f9, 0xff
-      nop.i 999
+       nop.i 0
 }
+;;

-// qnan snan inf norm     unorm 0 -+
-// 1    1    1   0        0     0 11
-// e                      3
-// X +-NAN, +-inf, ?        p9
-
+// set p9 if a is +/-NaN, +/-Inf
 { .mfi
-      nop.m 999
+       nop.m 0
 (p11) fclass.nm p9, p0 = f8, 0xff
-      nop.i 999;;
-}
-
-{ .mfi
-  nop.m 0
-  //  y denormal ? set D flag (if |x|<|y|)
-  (p8) fnma.s0 f10=f9,f1,f9
-  nop.i 0;;
-}
-
-
-{.mfi
-  nop.m 0
-  // normalize x (if |x|<|y|)
-  (p8) fma.s0 f8=f8,f1,f0
-  nop.i 0
-}
-{.bbb
-  (p9) br.cond.spnt FMOD_X_NAN_INF
-  (p7) br.cond.spnt FMOD_Y_NAN_INF_ZERO
-  // if |x|<|y|, return
-  (p8) br.ret.spnt    b0;;
-}
-
-  {.mfi
-  nop.m 0
-  // x denormal ? set D flag
-  fnma.s0 f32=f6,f1,f6
       nop.i 0
 }
 { .mfi
       nop.m 0
-  // y denormal ? set D flag
-  fnma.s0 f33=f7,f1,f7
-  nop.i 0;;
+       // b denormal ? set D flag (if |a|<|b|)
+ (p8)  fnma.s0 FR_DFLAG = f9, f1, f9
+       nop.i 0
 }
+;;

 { .mfi
-  // f15=2^32
-  setf.exp f15=r2
+       // FR_2P32 = 2^32
+       setf.exp FR_2P32 = GR_2P32
       // (4) q1 = q0+e0*q0
-  (p6) fma.s1 f13=f12,f13,f13
+ (p6)  fma.s1 FR_Q1 = FR_E0, FR_Q0, FR_Q0
       nop.i 0
 }
 { .mfi
       nop.m 0
       // (5) e1 = e0 * e0 + 2^-34
-  (p6) fma.s1 f14=f12,f12,f11
-  nop.i 0;;
+ (p6)  fma.s1 FR_E1 = FR_E0, FR_E0, FR_SMALLBIAS
+       nop.i 0
 }
-{.mlx
+;;
+
+{ .mfi
       nop.m 0
-  movl r2=0x33a00000;;
+       // normalize a (if |a|<|b|)
+ (p8)  fma.s0 f8 = f8, f1, f0
+       nop.i 0
 }
+{ .bbb
+ (p9) br.cond.spnt FMOD_A_NAN_INF
+ (p7) br.cond.spnt FMOD_B_NAN_INF_ZERO
+       // if |a|<|b|, return
+ (p8) br.ret.spnt b0
+}
+;;
+
+
 { .mfi
       nop.m 0
       // (6) y1 = y0 + e0 * y0
-  (p6) fma.s1 f10=f12,f10,f10
-  nop.i 0;;
-}
-{.mfi
-  // set f12=1.25*2^{-24}
-  setf.s f12=r2
-  // (7) q2=q1+e1*q1
-  (p6) fma.s1 f13=f13,f14,f13
-  nop.i 0;;
+ (p6)  fma.s1 FR_Y_INV1 = FR_E0, FR_Y_INV0, FR_Y_INV0
+       nop.i 0
 }
+;;
+
 { .mfi
       nop.m 0
-  fmerge.s f9=f8,f9
+       // a denormal ? set D flag
+       // b denormal ? set D flag
+       fcmp.eq.s0 p12,p0 = FR_ABS_A, FR_ABS_B
+       nop.i 0
+}
+{ .mfi
+       // set FR_ROUNDCONST = 1.25*2^{-24}
+       setf.s FR_ROUNDCONST = GR_ROUNDCONST
+       // (7) q2 = q1+e1*q1
+ (p6)  fma.s1 FR_Q = FR_Q1, FR_E1, FR_Q1
+       nop.i 0
+}
+;;
+
+{ .mfi
+       nop.m 0
+       fmerge.s FR_B_SGN_A = f8, f9
       nop.i 0
 }
 { .mfi
       nop.m 0
       // (8) y2 = y1 + e1 * y1
-  (p6) fma.s1 f10=f14,f10,f10
+ (p6)  fma.s1 FR_Y_INV = FR_E1, FR_Y_INV1, FR_Y_INV1
       // set p6 = 0, p10 = 0
-  cmp.ne.and p6,p10=r0,r0;;
+       cmp.ne.and p6, p10 = r0, r0
 }
+;;

-
+//   will compute integer quotient bits (24 bits per iteration)
 .align 32
 loop64:
 { .mfi
       nop.m 0
       // compare q2, 2^32
-  fcmp.lt.unc.s1 p8,p7=f13,f15
+       fcmp.lt.unc.s1 p8, p7 = FR_Q, FR_2P32
       nop.i 0
 }
 { .mfi
       nop.m 0
       // will truncate quotient to integer, if exponent<32 (in advance)
-  fcvt.fx.trunc.s1 f11=f13
-  nop.i 0;;
+       fcvt.fx.trunc.s1 FR_QINT = FR_Q
+       nop.i 0
 }
-  {.mfi
-  nop.m 0
-  // if exponent>32, round quotient to single precision (perform in advance)
-  fma.s.s1 f13=f13,f1,f0
-  nop.i 0;;
-}
-
+;;

 { .mfi
       nop.m 0
-  // set f12=sgn(a)
-  (p8) fmerge.s f12=f8,f1
+       // if exponent>32 round quotient to single precision (perform in advance)
+       fma.s.s1 FR_QRND24 = FR_Q, f1, f0
+       nop.i 0
+}
+;;
+
+{ .mfi
+       nop.m 0
+       // set FR_ROUNDCONST = sgn(a)
+ (p8)  fmerge.s FR_ROUNDCONST = f8, f1
       nop.i 0
 }
 { .mfi
       nop.m 0
       // normalize truncated quotient
-  (p8) fcvt.xf f13=f11
-  nop.i 0;;
+ (p8)  fcvt.xf FR_QRND24 = FR_QINT
+       nop.i 0
 }
+;;
+
 { .mfi
       nop.m 0
-  // calculate remainder (assuming f13=RZ(Q))
-  (p7) fnma.s1 f14=f13,f7,f6
+       // calculate remainder (assuming FR_QRND24 = RZ(Q))
+ (p7)  fnma.s1 FR_E1 = FR_QRND24, FR_ABS_B, FR_ABS_A
       nop.i 0
 }
 { .mfi
       nop.m 0
       // also if exponent>32, round quotient to single precision
       // and subtract 1 ulp: q = q-q*(1.25*2^{-24})
-  (p7) fnma.s.s1 f11=f13,f12,f13
-  nop.i 0;;
+ (p7)  fnma.s.s1 FR_QINT_Z = FR_QRND24, FR_ROUNDCONST, FR_QRND24
+       nop.i 0
 }
+;;

 { .mfi
       nop.m 0
       // (p8) calculate remainder (82-bit format)
-  (p8) fnma.s1 f11=f13,f7,f6
+ (p8)  fnma.s1 FR_QREM = FR_QRND24, FR_ABS_B, FR_ABS_A
       nop.i 0
 }
 { .mfi
       nop.m 0
-  // (p7) calculate remainder (assuming f11=RZ(Q))
-  (p7) fnma.s1 f6=f11,f7,f6
-  nop.i 0;;
+       // (p7) calculate remainder (assuming FR_QINT_Z = RZ(Q))
+ (p7)  fnma.s1 FR_ABS_A = FR_QINT_Z, FR_ABS_B, FR_ABS_A
+       nop.i 0
 }
-
+;;

 { .mfi
       nop.m 0
-  // Final iteration (p8): is f6 the correct remainder (quotient was not overestimated) ?
-  (p8) fcmp.lt.unc.s1 p6,p10=f11,f0
-  nop.i 0;;
+       // Final iteration (p8): is FR_ABS_A the correct remainder 
+       // (quotient was not overestimated) ?
+ (p8)  fcmp.lt.unc.s1 p6, p10 = FR_QREM, f0
+       nop.i 0
 }
+;;
+
 { .mfi
       nop.m 0
       // get new quotient estimation: a'*y2
-  (p7) fma.s1 f13=f14,f10,f0
+ (p7)  fma.s1 FR_Q = FR_E1, FR_Y_INV, f0
       nop.i 0
 }
 { .mfb
       nop.m 0
-  // was f13=RZ(Q) ? (then new remainder f14>=0)
-  (p7) fcmp.lt.unc.s1 p7,p9=f14,f0
-  nop.b 0;;
+       // was FR_Q = RZ(Q) ? (then new remainder FR_E1> = 0)
+ (p7)  fcmp.lt.unc.s1 p7, p9 = FR_E1, f0
+       nop.b 0
 }
-
+;;

 .pred.rel "mutex", p6, p10
 { .mfb
       nop.m 0
-  // add b to estimated remainder (to cover the case when the quotient was overestimated)
-  // also set correct sign by using f9=|b|*sgn(a), f12=sgn(a)
-  (p6) fma.s0 f8=f11,f12,f9
+       // add b to estimated remainder (to cover the case when the quotient was
+       // overestimated)
+       // also set correct sign by using 
+       // FR_B_SGN_A = |b|*sgn(a), FR_ROUNDCONST = sgn(a)
+ (p6)  fma.s0 f8 = FR_QREM, FR_ROUNDCONST, FR_B_SGN_A
       nop.b 0
 }
 { .mfb
       nop.m 0
-  // set correct sign of result before returning: f12=sgn(a)
-  (p10) fma.s0 f8=f11,f12,f0
-  (p8) br.ret.sptk b0;;
+       // set correct sign of result before returning: FR_ROUNDCONST = sgn(a)
+ (p10) fma.s0 f8 = FR_QREM, FR_ROUNDCONST, f0
+ (p8)  br.ret.sptk b0
 }
+;;
+
 { .mfi
       nop.m 0
       // if f13! = RZ(Q), get alternative quotient estimation: a''*y2
-  (p7) fma.s1 f13=f6,f10,f0
+ (p7)  fma.s1 FR_Q = FR_ABS_A, FR_Y_INV, f0
       nop.i 0
 }
 { .mfb
       nop.m 0
-  // if f14 was RZ(Q), set remainder to f14
-  (p9) mov f6=f14
-  br.cond.sptk loop64;;
+       // if FR_E1 was RZ(Q), set remainder to FR_E1
+ (p9)  fma.s1 FR_ABS_A = FR_E1, f1, f0
+       br.cond.sptk loop64
 }
+;;

+FMOD_A_NAN_INF:

-
-FMOD_X_NAN_INF:
-
-// Y zero ?
+// b zero ?
 { .mfi
       nop.m 0
-  fclass.m p10,p0=f8,0xc3     // Test x=nan
+       fclass.m p10, p0 = f8, 0xc3 // Test a = nan
       nop.i 0
 }
 { .mfi
       nop.m 0
-  fma.s1 f10=f9,f1,f0
-  nop.i 0;;
+       fma.s1 FR_NORM_B = f9, f1, f0
+       nop.i 0
 }
+;;

 { .mfi
       nop.m 0
@ -395,97 +442,116 @@ FMOD_X_NAN_INF:
 { .mfi
       nop.m 0
 (p10) fclass.m p10, p0 = f9, 0x07 // Test x = nan, and y = zero
-  nop.i 0;;
+       nop.i 0
 }
+;;
+
 { .mfb
       nop.m 0
- fcmp.eq.unc.s1 p11,p0=f10,f0
-(p10) br.ret.spnt b0;;        // Exit with result=x if x=nan and y=zero
+       fcmp.eq.unc.s1 p11, p0 = FR_NORM_B, f0
+ (p10) br.ret.spnt b0 // Exit with result = a if a = nan and b = zero
 }
+;;
+
 { .mib
       nop.m 0
       nop.i 0
       // if Y zero
-  (p11) br.cond.spnt FMOD_Y_ZERO;;
+ (p11) br.cond.spnt FMOD_B_ZERO
 }
+;;

-// X infinity? Return QNAN indefinite
+// a= infinity? Return QNAN indefinite
 { .mfi
       // set p7 t0 0
       cmp.ne p7, p0 = r0, r0
       fclass.m.unc p8, p9 = f8, 0x23
-     nop.i 999;;
+       nop.i 0
 }
-// Y NaN ?
+;;
+
+// b NaN ?
 { .mfi
-     nop.m 999
+       nop.m 0
 (p8)  fclass.m p9, p8 = f9, 0xc3
-     nop.i 0;;
+       nop.i 0
 }
-// Y not pseudo-zero ? (r29 holds significand)
+;;
+
+// b not pseudo-zero ? (GR_SIG_B holds significand)
 { .mii
-     nop.m 999
-(p8) cmp.ne p7,p0=r29,r0
-     nop.i 0;;
+       nop.m 0
+ (p8)  cmp.ne p7, p0 = GR_SIG_B, r0
+       nop.i 0
 }
+;;
+
 { .mfi
-    nop.m 999
+       nop.m 0
 (p8)  frcpa.s0 f8, p0 = f8, f8
       nop.i 0
 }
 { .mfi
-     nop.m 999
+       nop.m 0
       // also set Denormal flag if necessary
 (p7)  fnma.s0 f9 = f9, f1, f9
-     nop.i 999 ;;
+       nop.i 0
 }
+;;

 { .mfb
-      nop.m 999
+       nop.m 0
 (p8)  fma.s0 f8 = f8, f1, f0
-      nop.b 999 ;;
+       nop.b 0
 }
+;;

 { .mfb
-      nop.m 999
+       nop.m 0
 (p9)  frcpa.s0 f8, p7 = f8, f9
-      br.ret.sptk    b0 ;;
+       br.ret.sptk b0
 }
+;;

-
-FMOD_Y_NAN_INF_ZERO:
-// Y INF
+FMOD_B_NAN_INF_ZERO:
+// b INF
 { .mfi
-      nop.m 999
+       nop.m 0
       fclass.m.unc p7, p0 = f9, 0x23
-      nop.i 999 ;;
+       nop.i 0
 }
+;;

 { .mfb
-      nop.m 999
+       nop.m 0
 (p7)  fma.s0 f8 = f8, f1, f0
-(p7)  br.ret.spnt    b0 ;;
+ (p7)  br.ret.spnt b0
 }
+;;

-// Y NAN?
+// b NAN?
 { .mfi
-      nop.m 999
+       nop.m 0
       fclass.m.unc p9, p10 = f9, 0xc3
-      nop.i 999 ;;
+       nop.i 0
 }
+;;
+
 { .mfi
-      nop.m 999
+       nop.m 0
 (p10) fclass.nm p9, p0 = f9, 0xff
-      nop.i 999 ;;
+       nop.i 0
 }
+;;

 { .mfb
-      nop.m 999
+       nop.m 0
 (p9)  fma.s0 f8 = f9, f1, f0
-(p9)  br.ret.spnt    b0 ;;
+ (p9)  br.ret.spnt b0
 }
+;;

-FMOD_Y_ZERO:
+FMOD_B_ZERO:
 // Y zero? Must be zero at this point
 // because it is the only choice left.
 // Return QNAN indefinite
@ -493,49 +559,54 @@ FMOD_Y_ZERO:
 { .mfi
       nop.m 0
       // set Invalid
-  frcpa.s0 f12,p0=f0,f0
+       frcpa.s0 FR_TMP, p0 = f0, f0
       nop.i 0
 }
-// X NAN?
+;;
+
+// a NAN?
 { .mfi
-      nop.m 999
+       nop.m 0
       fclass.m.unc p9, p10 = f8, 0xc3
-      nop.i 999 ;;
+       nop.i 0
 }
+;;
+
 { .mfi
-      nop.m 999
+       alloc GR_ARPFS = ar.pfs, 1, 4, 4, 0
 (p10) fclass.nm p9, p10 = f8, 0xff
-      nop.i 999 ;;
+       nop.i 0
 }
+;;

 { .mfi
- nop.m 999
- (p9) frcpa.s0 f11,p7=f8,f0
- nop.i 0;;
+       nop.m 0
+ (p9)  frcpa.s0 FR_TMP2, p7 = f8, f0
+       nop.i 0
 }
-
+;;

 { .mfi
-      nop.m 999
-(p10) frcpa.s0  f11,p7 = f9,f9
-      mov    GR_Parameter_TAG = 120 ;;
+       nop.m 0
+ (p10) frcpa.s0 FR_TMP2, p7 = f9, f9
+       mov GR_Parameter_TAG = 120
 }
+;;

 { .mfi
-      nop.m 999
-      fmerge.s      f10 = f8, f8
-      nop.i 999
+       nop.m 0
+       fmerge.s FR_X = f8, f8
+       nop.i 0
 }
-
 { .mfb
-      nop.m 999
-      fma.s0 f8=f11,f1,f0
-      br.sptk __libm_error_region;;
+       nop.m 0
+       fma.s0 f8 = FR_TMP2, f1, f0
+       br.sptk __libm_error_region
 }
+;;

 GLOBAL_IEEE754_END(fmodl)

-
 LOCAL_LIBM_ENTRY(__libm_error_region)
 .prologue
 { .mfi
@ -549,13 +620,17 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
       add sp = -64, sp             // Create new stack
       nop.f 0
       mov GR_SAVE_GP = gp          // Save gp
-};;
+}
+;;
+
 { .mmi
       stfe [ GR_Parameter_Y ] = FR_Y, 16 // Save Parameter 2 on stack
       add GR_Parameter_X = 16, sp  // Parameter 1 address
 .save b0, GR_SAVE_B0
       mov GR_SAVE_B0 = b0          // Save b0
-};;
+}
+;;
+
 .body
 { .mib
       stfe [ GR_Parameter_X ] = FR_X // Store Parameter 1 on stack
@ -566,30 +641,32 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
       stfe [ GR_Parameter_Y ] = FR_RESULT // Store Parameter 3 on stack
       add GR_Parameter_Y = -16, GR_Parameter_Y
       br.call.sptk b0 = __libm_error_support# // Call error handling function
-};;
+}
+;;
+
 { .mmi
       nop.m 0
       nop.m 0
       add GR_Parameter_RESULT = 48, sp
-};;
+}
+;;
+
 { .mmi
       ldfe f8 = [ GR_Parameter_RESULT ] // Get return result off stack
 .restore sp
       add sp = 64, sp                   // Restore stack pointer
       mov b0 = GR_SAVE_B0               // Restore return address
-};;
+}
+;;
+
 { .mib
       mov gp = GR_SAVE_GP               // Restore gp
       mov ar.pfs = GR_SAVE_PFS          // Restore ar.pfs
       br.ret.sptk b0                    // Return
-};;
+}
+;;

 LOCAL_LIBM_END(__libm_error_region)

-
-
-
 .type __libm_error_support#, @function
 .global __libm_error_support#
-
-
--- a/sysdeps/ia64/fpu/e_hypot.S
+++ b/sysdeps/ia64/fpu/e_hypot.S
@ -106,6 +106,7 @@ FR_RESULT           = f8

 LOCAL_LIBM_ENTRY(cabs)
 LOCAL_LIBM_END(cabs)
+
 GLOBAL_IEEE754_ENTRY(hypot)

 {.mfi
@ -384,6 +385,7 @@ GLOBAL_IEEE754_ENTRY(hypot)
 (p9) br.ret.sptk b0;; 
 }
 GLOBAL_IEEE754_END(hypot)
+
 LOCAL_LIBM_ENTRY(__libm_error_region)
 .prologue
 { .mfi
--- a/sysdeps/ia64/fpu/e_hypotf.S
+++ b/sysdeps/ia64/fpu/e_hypotf.S
@ -106,6 +106,7 @@ FR_RESULT           = f8

 LOCAL_LIBM_ENTRY(cabsf)
 LOCAL_LIBM_END(cabsf)
+
 GLOBAL_IEEE754_ENTRY(hypotf)
 {.mfi
  alloc r32= ar.pfs,0,4,4,0
@ -337,6 +338,7 @@ GLOBAL_IEEE754_ENTRY(hypotf)
 (p9) br.ret.sptk b0;; 
 }
 GLOBAL_IEEE754_END(hypotf)
+
 LOCAL_LIBM_ENTRY(__libm_error_region)
 .prologue
 { .mii
--- a/sysdeps/ia64/fpu/e_hypotl.S
+++ b/sysdeps/ia64/fpu/e_hypotl.S
@ -105,6 +105,7 @@ FR_RESULT           = f8

 LOCAL_LIBM_ENTRY(cabsl)
 LOCAL_LIBM_END(cabsl)
+
 GLOBAL_IEEE754_ENTRY(hypotl)
 {.mfi
  alloc r32= ar.pfs,0,4,4,0
@ -421,6 +422,7 @@ GLOBAL_IEEE754_ENTRY(hypotl)
 (p9) br.ret.sptk b0;; 
 }
 GLOBAL_IEEE754_END(hypotl)
+
 LOCAL_LIBM_ENTRY(__libm_error_region)
 .prologue
 { .mfi
--- a/sysdeps/ia64/fpu/e_lgamma_r.c
+++ b/sysdeps/ia64/fpu/e_lgamma_r.c
@ -1,5 +1,6 @@
 /* file: lgamma_r.c */

+
 // Copyright (c) 2002 Intel Corporation
 // All rights reserved.
 //
@ -20,7 +21,6 @@
 // products derived from this software without specific prior written
 // permission.

-// WARRANTY DISCLAIMER
 //
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
--- a/sysdeps/ia64/fpu/e_lgammaf_r.c
+++ b/sysdeps/ia64/fpu/e_lgammaf_r.c
@ -1,5 +1,6 @@
 /* file: lgammaf_r.c */

+
 // Copyright (c) 2002 Intel Corporation
 // All rights reserved.
 //
@ -20,7 +21,6 @@
 // products derived from this software without specific prior written
 // permission.

-// WARRANTY DISCLAIMER
 //
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
--- a/sysdeps/ia64/fpu/e_lgammal_r.c
+++ b/sysdeps/ia64/fpu/e_lgammal_r.c
@ -1,5 +1,6 @@
 /* file: lgammal_r.c */

+
 // Copyright (c) 2002 Intel Corporation
 // All rights reserved.
 //
@ -20,7 +21,6 @@
 // products derived from this software without specific prior written
 // permission.

-// WARRANTY DISCLAIMER
 //
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
--- a/sysdeps/ia64/fpu/e_log.S
+++ b/sysdeps/ia64/fpu/e_log.S
@ -1386,6 +1386,7 @@ GLOBAL_IEEE754_ENTRY(log10)
 };;
 GLOBAL_IEEE754_END(log10)

+
 GLOBAL_IEEE754_ENTRY(log)
 { .mfi
      getf.exp      GR_Exp = f8 // if x is unorm then must recompute
@ -1667,6 +1668,7 @@ log_libm_err:
 };;
 GLOBAL_IEEE754_END(log)

+
 LOCAL_LIBM_ENTRY(__libm_error_region)
 .prologue
 { .mfi
--- a/sysdeps/ia64/fpu/e_log2.S
+++ b/sysdeps/ia64/fpu/e_log2.S
@ -655,6 +655,7 @@ SPECIAL_LOG2:

 GLOBAL_LIBM_END(log2)

+
 LOCAL_LIBM_ENTRY(__libm_error_region)
 .prologue
 { .mfi
--- a/sysdeps/ia64/fpu/e_log2f.S
+++ b/sysdeps/ia64/fpu/e_log2f.S
@ -493,6 +493,7 @@ SPECIAL_log2f:

 GLOBAL_LIBM_END(log2f)

+
 LOCAL_LIBM_ENTRY(__libm_error_region)
 .prologue
 { .mfi
--- a/sysdeps/ia64/fpu/e_log2l.S
+++ b/sysdeps/ia64/fpu/e_log2l.S
@ -761,6 +761,7 @@ LOG2_PSEUDO_ZERO:

 GLOBAL_IEEE754_END(log2l)

+
 LOCAL_LIBM_ENTRY(__libm_error_region)
 .prologue
 { .mfi
--- a/sysdeps/ia64/fpu/e_logf.S
+++ b/sysdeps/ia64/fpu/e_logf.S
@ -841,6 +841,7 @@ GLOBAL_IEEE754_ENTRY(log10f)
      br.cond.sptk  logf_log10f_common
 };;
 GLOBAL_IEEE754_END(log10f)
+
 GLOBAL_IEEE754_ENTRY(logf)
 { .mfi
      getf.exp      GR_Exp = f8 // if x is unorm then must recompute
@ -1087,6 +1088,7 @@ logf_libm_err:
 };;
 GLOBAL_IEEE754_END(logf)

+
 // Stack operations when calling error support.
 //       (1)               (2)                          (3) (call)              (4)
 //   sp   -> +          psp -> +                     psp -> +                   sp -> +
--- a/sysdeps/ia64/fpu/e_logl.S
+++ b/sysdeps/ia64/fpu/e_logl.S
@ -634,6 +634,7 @@ GLOBAL_IEEE754_ENTRY(logl)

 GLOBAL_IEEE754_END(logl)

+
 GLOBAL_IEEE754_ENTRY(log10l)
 { .mfi
      alloc r32 = ar.pfs,0,21,4,0
@ -1144,6 +1145,7 @@ LOGL_64_negative:


 GLOBAL_IEEE754_END(log10l)
+
 LOCAL_LIBM_ENTRY(__libm_error_region)
 .prologue
 { .mfi
--- a/sysdeps/ia64/fpu/e_pow.S
+++ b/sysdeps/ia64/fpu/e_pow.S
@ -2234,6 +2234,7 @@ POW_OVER_UNDER_ERROR:

 GLOBAL_LIBM_END(pow)

+
 LOCAL_LIBM_ENTRY(__libm_error_region)

 .prologue
--- a/sysdeps/ia64/fpu/e_powf.S
+++ b/sysdeps/ia64/fpu/e_powf.S
--- a/sysdeps/ia64/fpu/e_powl.S
+++ b/sysdeps/ia64/fpu/e_powl.S
@ -60,6 +60,7 @@
 // 02/10/03 Reordered header: .section, .global, .proc, .align;
 //          used data8 for long double table values
 // 04/17/03 Added missing mutex directive
+// 10/13/03 Corrected .endp names to match .proc names
 //
 //*********************************************************************
 //
@ -2755,6 +2756,7 @@ POWL_64_SQRT:

 GLOBAL_LIBM_END(powl)

+
 LOCAL_LIBM_ENTRY(__libm_error_region)
 .prologue
 { .mfi
@ -2803,6 +2805,6 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
        br.ret.sptk     b0                     // Return
 };;

-.endp
+LOCAL_LIBM_END(__libm_error_region#)
 .type   __libm_error_support#,@function
 .global __libm_error_support#
--- a/sysdeps/ia64/fpu/e_remainder.S
+++ b/sysdeps/ia64/fpu/e_remainder.S
@ -531,6 +531,7 @@ EXP_ERROR_RETURN:
 GLOBAL_IEEE754_END(remainder)


+
 LOCAL_LIBM_ENTRY(__libm_error_region)
 .prologue
 { .mfi
--- a/sysdeps/ia64/fpu/e_remainderf.S
+++ b/sysdeps/ia64/fpu/e_remainderf.S
@ -550,6 +550,7 @@ EXP_ERROR_RETURN:
 GLOBAL_IEEE754_END(remainderf)


+
 LOCAL_LIBM_ENTRY(__libm_error_region)
 .prologue
 { .mfi
--- a/sysdeps/ia64/fpu/e_remainderl.S
+++ b/sysdeps/ia64/fpu/e_remainderl.S
@ -557,6 +557,7 @@ EXP_ERROR_RETURN:
 }

 GLOBAL_IEEE754_END(remainderl)
+
 LOCAL_LIBM_ENTRY(__libm_error_region)
 .prologue
 { .mfi
--- a/sysdeps/ia64/fpu/e_scalb.S
+++ b/sysdeps/ia64/fpu/e_scalb.S
@ -43,6 +43,7 @@
 // 01/26/01 Scalb completely reworked and now standalone version
 // 05/20/02 Cleaned up namespace and sf0 syntax
 // 02/10/03 Reordered header: .section, .global, .proc, .align
+// 08/06/03 Improved performance
 //
 // API
 //==============================================================
@ -50,31 +51,52 @@
 // input  floating point f8 and floating point f9
 // output floating point f8
 //
+// int_type = 0 if int is 32 bits
+// int_type = 1 if int is 64 bits
+//
 // Returns x* 2**n using an fma and detects overflow
 // and underflow.
 //
 //
+// Strategy:
+//  Compute biased exponent of result exp_Result = N + exp_X
+//  Break into ranges:
+//   exp_Result > 0x103fe                 -> Certain overflow
+//   exp_Result = 0x103fe                 -> Possible overflow
+//   0x0fc01 <= exp_Result < 0x103fe      -> No over/underflow (main path)
+//   0x0fc01 - 52 <= exp_Result < 0x0fc01 -> Possible underflow
+//   exp_Result < 0x0fc01 - 52            -> Certain underflow

+FR_Big         = f6
+FR_NBig        = f7
 FR_Floating_X  = f8
 FR_Result      = f8
 FR_Floating_N  = f9
 FR_Result2     = f9
-FR_Norm_N      = f10
-FR_Result3     = f11
-FR_Norm_X      = f12
+FR_Result3     = f10
+FR_Norm_X      = f11
+FR_Two_N       = f12
 FR_N_float_int = f13
-FR_Two_N       = f14
-FR_Two_to_Big  = f15
-FR_Big         = f6
-FR_NBig        = f7
+FR_Norm_N      = f14

+GR_neg_ov_limit= r14
+GR_big_exp     = r14
 GR_N_Biased    = r15
 GR_Big         = r16
-GR_NBig        = r17
-GR_Scratch     = r18
-GR_Scratch1    = r19
+GR_exp_Result  = r18
+GR_pos_ov_limit= r19
+GR_exp_sure_ou = r19
 GR_Bias        = r20
 GR_N_as_int    = r21
+GR_signexp_X   = r22
+GR_exp_X       = r23
+GR_exp_mask    = r24
+GR_max_exp     = r25
+GR_min_exp     = r26
+GR_min_den_exp = r27
+GR_Scratch     = r28
+GR_signexp_N   = r29
+GR_exp_N       = r30

 GR_SAVE_B0          = r32
 GR_SAVE_GP          = r33
@ -89,163 +111,156 @@ GLOBAL_IEEE754_ENTRY(scalb)

 //
 //   Is x NAN, INF, ZERO, +-?
-//
-{    .mfi
-     alloc          r32=ar.pfs,0,3,4,0
-     fclass.m.unc  p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
-     addl  GR_Scratch  = 0x019C3F,r0 
-}
-//
-//   Is y a NAN, INF, ZERO, +-?
-//
-{    .mfi
-     nop.m 999
-     fclass.m.unc  p6,p0 = FR_Floating_N, 0xe7 //@snan | @qnan | @inf |  @zero
-     addl  GR_Scratch1  = 0x063BF,r0 
-}
-;;
-
-//
-//   Convert N to a fp integer
-//   Normalize x
-//
-{    .mfi
-     nop.m 0
-     fnorm.s1  FR_Norm_N  =   FR_Floating_N 
-     nop.i 999
-}
-{    .mfi
-     nop.m 999
-     fnorm.s1  FR_Norm_X  =   FR_Floating_X 
-     nop.i 999
-};;
-
-//
-//   Create 2*big
-//   Create 2**-big 
-//   Normalize x
-//   Branch on special values.
-//
-{ .mib
-     setf.exp      FR_Big = GR_Scratch                  
-     nop.i 0 
-(p6) br.cond.spnt  SCALB_NAN_INF_ZERO 
-}
-{ .mib
-     setf.exp      FR_NBig = GR_Scratch1                  
-     nop.i 0 
-(p7) br.cond.spnt  SCALB_NAN_INF_ZERO 
-};;
-
-//
-//   Convert N to a fp integer
-//   Create -35000
-//  
-{    .mfi
-     addl  GR_Scratch = 1,r0
-     fcvt.fx.trunc.s1   FR_N_float_int = FR_Norm_N 
-     addl    GR_NBig = -35000,r0
-}
-;;
-
-//
-//   Put N if a GP register
-//   Convert  N_float_int to floating point value
-//   Create 35000
 //   Build the exponent Bias
 //
-{    .mii
-     getf.sig     GR_N_as_int = FR_N_float_int
-     shl   GR_Scratch = GR_Scratch,63
-     addl  GR_Big = 35000,r0
+{    .mfi
+     getf.exp      GR_signexp_N = FR_Floating_N // Get signexp of n
+     fclass.m      p6,p0 = FR_Floating_X, 0xe7  // @snan | @qnan | @inf | @zero
+     mov           GR_Bias = 0x0ffff
 }
 {    .mfi
-     addl GR_Bias = 0x0FFFF,r0
-     fcvt.xf  FR_N_float_int = FR_N_float_int
+     mov           GR_Big = 35000      // If N this big then certain overflow
+     fcvt.fx.trunc.s1   FR_N_float_int = FR_Floating_N // Get N in significand
     nop.i         0
-};;
+}
+;;
+
+{    .mfi
+     getf.exp      GR_signexp_X = FR_Floating_X // Get signexp of x
+     fclass.m      p7,p0 = FR_Floating_N, 0x0b  // Test for n=unorm
+     nop.i         0
+}
+//
+//   Normalize n
+//
+{    .mfi
+     mov           GR_exp_mask = 0x1ffff     // Exponent mask
+     fnorm.s1      FR_Norm_N = FR_Floating_N
+     nop.i         0
+}
+;;

 //
-//   Catch those fp values that are beyond 2**64-1
-//   Is N > 35000     
-//   Is N < -35000     
+//   Is n NAN, INF, ZERO, +-?
 //
 {    .mfi
-     cmp.ne.unc  p9,p10 = GR_N_as_int,GR_Scratch
-     nop.f 0
-     nop.i 0
+     mov           GR_big_exp = 0x1003e      // Exponent at which n is integer
+     fclass.m      p9,p0 = FR_Floating_N, 0xe7  // @snan | @qnan | @inf | @zero
+     mov           GR_max_exp = 0x103fe      // Exponent of maximum double
 }
+//
+//   Normalize x
+//
+{ .mfb
+     nop.m         0
+     fnorm.s1      FR_Norm_X = FR_Floating_X
+(p7) br.cond.spnt  SCALB_N_UNORM             // Branch if n=unorm
+}
+;;
+
+SCALB_COMMON1:
+// Main path continues.  Also return here from u=unorm path.
+//   Handle special cases if x = Nan, Inf, Zero
+{ .mfb
+     nop.m         0
+     fcmp.lt.s1    p7,p0 = FR_Floating_N, f0  // Test N negative
+(p6) br.cond.spnt  SCALB_NAN_INF_ZERO
+}
+;;
+
+//   Handle special cases if n = Nan, Inf, Zero
+{    .mfi
+     getf.sig      GR_N_as_int = FR_N_float_int // Get n from significand
+     fclass.m      p8,p0 = FR_Floating_X, 0x0b // Test for x=unorm
+     mov           GR_exp_sure_ou = 0x1000e // Exp_N where x*2^N sure over/under
+}
+{    .mfb
+     mov           GR_min_exp = 0x0fc01      // Exponent of minimum double
+     fcvt.xf       FR_N_float_int = FR_N_float_int // Convert N to FP integer
+(p9) br.cond.spnt  SCALB_NAN_INF_ZERO
+}
+;;
+
 {    .mmi
-     cmp.ge.unc p6, p0 = GR_N_as_int, GR_Big
-     cmp.le.unc p8, p0 = GR_N_as_int, GR_NBig
+     and           GR_exp_N = GR_exp_mask, GR_signexp_N // Get exponent of N
+(p7) sub           GR_Big = r0, GR_Big          // Limit for N
     nop.i         0
-};;
+}
+;;

-//
-//   Is N really an int, only for those non-int indefinites?
-//   Create exp bias.     
-//
-{    .mfi
+{    .mib
+     cmp.lt        p9,p0 = GR_exp_N, GR_big_exp // N possible non-integer?
+     cmp.ge        p6,p0 = GR_exp_N, GR_exp_sure_ou // N certain over/under?
+(p8) br.cond.spnt  SCALB_X_UNORM             // Branch if x=unorm
+}
+;;
+
+SCALB_COMMON2:
+// Main path continues.  Also return here from x=unorm path.
+//   Create biased exponent for 2**N
+{    .mmi
+(p6) mov           GR_N_as_int = GR_Big      // Limit N
+;;
     add           GR_N_Biased = GR_Bias,GR_N_as_int
-(p9) fcmp.neq.unc.s1 p7,p0  =   FR_Norm_N, FR_N_float_int
     nop.i         0
-};;
+}
+;;
+
+{    .mfi
+     setf.exp      FR_Two_N = GR_N_Biased               // Form 2**N
+(p9) fcmp.neq.unc.s1 p9,p0 = FR_Norm_N, FR_N_float_int  // Test if N an integer
+     and           GR_exp_X = GR_exp_mask, GR_signexp_X // Get exponent of X
+}
+;;

 //
-//   Branch and return if N is not an int.
-//   Main path, create 2**N
+//   Compute biased result exponent
+//   Branch if N is not an integer
+//
+{    .mib
+     add           GR_exp_Result = GR_exp_X, GR_N_as_int
+     mov           GR_min_den_exp = 0x0fc01 - 52 // Exponent of min denorm dble
+(p9) br.cond.spnt  SCALB_N_NOT_INT
+}
+;;
+
+//
+//   Raise Denormal operand flag with compare
+//   Do final operation
 //
 {    .mfi
-     setf.exp      FR_Two_N = GR_N_Biased                   
-     nop.i                      999
+     cmp.lt        p7,p6 = GR_exp_Result, GR_max_exp  // Test no overflow
+     fcmp.ge.s0    p0,p11 = FR_Floating_X,FR_Floating_N  // Dummy to set denorm
+     cmp.lt        p9,p0 = GR_exp_Result, GR_min_den_exp // Test sure underflow
 }
 {    .mfb
     nop.m         0
-(p7) frcpa.s0          f8,p11     =    f0,f0
-(p7) br.ret.spnt    b0          
-};;
-
-//
-//   Set denormal on denormal input x and denormal input N
-//
-{    .mfi
-     nop.m                      999
-(p10)fcmp.ge.s1    p6,p8 = FR_Norm_N,f0
-     nop.i 0
-};;
-{    .mfi
-     nop.m                      999
-     fcmp.ge.s0    p0,p11 = FR_Floating_X,f0
-     nop.i                      999
+     fma.d.s0      FR_Result = FR_Two_N,FR_Norm_X,f0
+(p9) br.cond.spnt  SCALB_UNDERFLOW           // Branch if certain underflow
 }
-{    .mfi
-     nop.m                      999
-     fcmp.ge.s0    p12,p13 = FR_Floating_N,f0
-     nop.i 0
-};;
+;;

-//
-//   Adjust 2**N if N was very small or very large
-//
+{    .mib
+(p6) cmp.gt.unc    p6,p8 = GR_exp_Result, GR_max_exp  // Test sure overflow
+(p7) cmp.ge.unc    p7,p9 = GR_exp_Result, GR_min_exp  // Test no over/underflow
+(p7) br.ret.sptk   b0                         // Return from main path
+}
+;;

-{    .mfi
-     nop.m 0
-(p6) fma.s1  FR_Two_N = FR_Big,f1,f0
-     nop.i 0
+{    .bbb
+(p6) br.cond.spnt  SCALB_OVERFLOW            // Branch if certain overflow
+(p8) br.cond.spnt  SCALB_POSSIBLE_OVERFLOW   // Branch if possible overflow
+(p9) br.cond.spnt  SCALB_POSSIBLE_UNDERFLOW  // Branch if possible underflow
 }
-{ .mlx
-     nop.m 999
-     movl GR_Scratch = 0x00000000000303FF 
-};;
-{    .mfi
-     nop.m 0
-(p8) fma.s1  FR_Two_N = FR_NBig,f1,f0
-     nop.i 0
-}
-{    .mlx
-     nop.m 999
-     movl GR_Scratch1= 0x00000000000103FF 
-};;
+;;
+
+// Here if possible underflow.
+// Resulting exponent: 0x0fc01-52 <= exp_Result < 0x0fc01
+SCALB_POSSIBLE_UNDERFLOW:
+//
+// Here if possible overflow.
+// Resulting exponent: 0x103fe = exp_Result
+SCALB_POSSIBLE_OVERFLOW:

 //   Set up necessary status fields
 //
@ -254,137 +269,150 @@ GLOBAL_IEEE754_ENTRY(scalb)
 //   S3 user supplied status + FZ + TD   (Underflows)
 //
 {    .mfi
-     nop.m 999
+     mov           GR_pos_ov_limit = 0x103ff // Exponent for positive overflow
     fsetc.s3      0x7F,0x41
-     nop.i 999
+     nop.i         0
 }
 {    .mfi
-     nop.m 999
+     mov           GR_neg_ov_limit = 0x303ff // Exponent for negative overflow
     fsetc.s2      0x7F,0x42
-     nop.i 999
-};;
+     nop.i         0
+}
+;;

 //
-//   Do final operation
+//   Do final operation with s2 and s3
 //
 {    .mfi
-     setf.exp FR_NBig = GR_Scratch
-     fma.d.s0     FR_Result = FR_Two_N,FR_Norm_X,f0 
-     nop.i                           999
+     setf.exp      FR_NBig = GR_neg_ov_limit
+     fma.d.s3      FR_Result3 = FR_Two_N,FR_Norm_X,f0
+     nop.i         0
 }
 {    .mfi
-     nop.m                           999
-     fma.d.s3     FR_Result3 = FR_Two_N,FR_Norm_X,f0 
-     nop.i                           999
-};;
-{    .mfi
-     setf.exp FR_Big = GR_Scratch1
+     setf.exp      FR_Big = GR_pos_ov_limit
     fma.d.s2      FR_Result2 = FR_Two_N,FR_Norm_X,f0
-     nop.i                           999
-};;
+     nop.i         0
+}
+;;

 //   Check for overflow or underflow.
-//
-//   S0 user supplied status
-//   S2 user supplied status + WRE + TD  (Overflow)
-//   S3 user supplied status + FZ + TD   (Underflow)
-//
-//
 //   Restore s3
 //   Restore s2
 //
 {    .mfi
     nop.m         0
     fsetc.s3      0x7F,0x40
-     nop.i 999 
+     nop.i         0
 }
 {    .mfi
     nop.m         0
     fsetc.s2      0x7F,0x40
-     nop.i 999
-};;
+     nop.i         0
+}
+;;

 //
 //   Is the result zero?
 //
 {    .mfi
-     nop.m 999
-     fclass.m.unc   p6, p0 =  FR_Result3, 0x007
-     nop.i 999 
+     nop.m         0
+     fclass.m      p6, p0 =  FR_Result3, 0x007
+     nop.i         0
 }
 {    .mfi
-     addl GR_Tag = 53, r0
-     fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big
+     nop.m         0
+     fcmp.ge.s1    p7, p8 = FR_Result2 , FR_Big
     nop.i         0
-};;
+}
+;;

 //
 //   Detect masked underflow - Tiny + Inexact Only
 //
 {    .mfi
-     nop.m 999
+     nop.m         0
 (p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
-     nop.i 999 
-};; 
+     nop.i         0
+}
+;;

 //
 //   Is result bigger the allowed range?
 //   Branch out for underflow
 //
 {    .mfb
-(p6) addl GR_Tag = 54, r0
+     nop.m          0
 (p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
 (p6) br.cond.spnt   SCALB_UNDERFLOW
-};;
+}
+;;

 //
 //   Branch out for overflow
 //
-{ .mbb
-     nop.m 0
+{ .bbb
 (p7) br.cond.spnt   SCALB_OVERFLOW
 (p9) br.cond.spnt   SCALB_OVERFLOW
-};;
-
-//
-//   Return from main path.
-//
-{    .mfb
-     nop.m 999
-     nop.f 0
-     br.ret.sptk     b0;;                   
+     br.ret.sptk    b0             //   Return from main path.
 }
+;;
+
+// Here if result overflows
+SCALB_OVERFLOW:
+{ .mib
+     alloc         r32=ar.pfs,3,0,4,0
+     addl          GR_Tag = 53, r0     // Set error tag for overflow
+     br.cond.sptk  __libm_error_region // Call error support for overflow
+}
+;;
+
+// Here if result underflows
+SCALB_UNDERFLOW:
+{ .mib
+     alloc         r32=ar.pfs,3,0,4,0
+     addl          GR_Tag = 54, r0     // Set error tag for underflow
+     br.cond.sptk  __libm_error_region // Call error support for underflow
+}
+;;

 SCALB_NAN_INF_ZERO:

 //
-//   Convert N to a fp integer
+//   Before entry, N has been converted to a fp integer in significand of 
+//     FR_N_float_int
+//
+//   Convert  N_float_int to floating point value
 //
+{    .mfi
+     getf.sig     GR_N_as_int = FR_N_float_int
+     fclass.m     p6,p0 = FR_Floating_N, 0xc3 //@snan | @qnan
+     nop.i        0
+}
 {    .mfi
     addl         GR_Scratch = 1,r0
-     fcvt.fx.trunc.s1  FR_N_float_int = FR_Norm_N 
-     nop.i 999
-}
-{    .mfi
-     nop.m 0
-     fclass.m.unc  p6,p0 = FR_Floating_N, 0xc3 //@snan | @qnan 
+     fcvt.xf      FR_N_float_int = FR_N_float_int
     nop.i        0
-};;
+}
+;;
+
 {    .mfi
     nop.m        0
-     fclass.m.unc  p7,p0 = FR_Floating_X, 0xc3 //@snan | @qnan 
+     fclass.m     p7,p0 = FR_Floating_X, 0xc3 //@snan | @qnan
     shl          GR_Scratch = GR_Scratch,63
-};;
+}
+;;
+
 {    .mfi
     nop.m        0
-     fclass.m.unc  p8,p0 = FR_Floating_N, 0x21 // @inf
+     fclass.m     p8,p0 = FR_Floating_N, 0x21 // @inf
     nop.i        0
 }
 {    .mfi
     nop.m        0
-     fclass.m.unc  p9,p0 = FR_Floating_N, 0x22 // @-inf
+     fclass.m     p9,p0 = FR_Floating_N, 0x22 // @-inf
     nop.i        0
-};;
+}
+;;

 //
 //   Either X or N is a Nan, return result and possible raise invalid.
@ -393,12 +421,15 @@ SCALB_NAN_INF_ZERO:
     nop.m        0
 (p6) fma.d.s0     FR_Result = FR_Floating_N,FR_Floating_X,f0
 (p6) br.ret.spnt  b0
-};;
+}
+;;
+
 {    .mfb
-     getf.sig     GR_N_as_int = FR_N_float_int
+     nop.m        0
 (p7) fma.d.s0     FR_Result = FR_Floating_N,FR_Floating_X,f0
 (p7) br.ret.spnt  b0
-};;
+}
+;;

 //
 //   If N + Inf do something special
@ -413,43 +444,38 @@ SCALB_NAN_INF_ZERO:
     nop.m        0
 (p9) fnma.d.s0    FR_Floating_N = FR_Floating_N, f1, f0
     nop.i        0
-};;
+}
+;;

 //
 //   If N==-Inf,return x/(-N)
 //
 {    .mfb
-     nop.m 0
-(p9) frcpa.s0        FR_Result,p6 =  FR_Floating_X,FR_Floating_N
+     cmp.ne       p7,p0 = GR_N_as_int,GR_Scratch
+(p9) frcpa.s0     FR_Result,p0 = FR_Floating_X,FR_Floating_N
 (p9) br.ret.spnt  b0
-};;
-
-//
-//   Convert  N_float_int to floating point value
-//
-{     .mfi
-     cmp.ne.unc  p9,p0     =   GR_N_as_int,GR_Scratch
-     fcvt.xf  FR_N_float_int = FR_N_float_int
-     nop.i  0
-};;
+}
+;;

 //
 //   Is N an integer.
 //
 {    .mfi
     nop.m        0
-(p9) fcmp.neq.unc.s1 p7,p0  =   FR_Norm_N, FR_N_float_int
+(p7) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int
     nop.i        0
-};;
+}
+;;

 //
 //   If N not an int, return NaN and raise invalid.
 //
 {    .mfb
     nop.m        0
-(p7) frcpa.s0        FR_Result,p6     =    f0,f0
+(p7) frcpa.s0     FR_Result,p0 = f0,f0
 (p7) br.ret.spnt  b0
-};;
+}
+;;

 //
 //   Always return x in other path.
@ -458,13 +484,39 @@ SCALB_NAN_INF_ZERO:
     nop.m        0
     fma.d.s0     FR_Result = FR_Floating_X,f1,f0
     br.ret.sptk  b0
-};;
+}
+;;
+
+// Here if n not int
+// Return NaN and raise invalid.
+SCALB_N_NOT_INT:
+{    .mfb
+     nop.m        0
+     frcpa.s0     FR_Result,p0 = f0,f0
+     br.ret.sptk  b0
+}
+;;
+
+// Here if n=unorm
+SCALB_N_UNORM:
+{ .mfb
+     getf.exp      GR_signexp_N = FR_Norm_N // Get signexp of normalized n
+     fcvt.fx.trunc.s1   FR_N_float_int = FR_Norm_N // Get N in significand
+     br.cond.sptk  SCALB_COMMON1            // Return to main path
+}
+;;
+
+// Here if x=unorm
+SCALB_X_UNORM:
+{ .mib
+     getf.exp      GR_signexp_X = FR_Norm_X // Get signexp of normalized x
+     nop.i         0
+     br.cond.sptk  SCALB_COMMON2            // Return to main path
+}
+;;

 GLOBAL_IEEE754_END(scalb)
-__libm_error_region:
-
-SCALB_OVERFLOW: 
-SCALB_UNDERFLOW: 
+LOCAL_LIBM_ENTRY(__libm_error_region)

 //
 // Get stack address of N
@ -517,9 +569,9 @@ SCALB_UNDERFLOW:
 //  Get location of result on stack
 //
 { .mmi
-   nop.m 0
-   nop.m 0
   add   GR_Parameter_RESULT = 48,sp
+   nop.m 0
+   nop.i 0
 };;

 //
--- a/sysdeps/ia64/fpu/e_scalbf.S
+++ b/sysdeps/ia64/fpu/e_scalbf.S
@ -43,6 +43,7 @@
 // 01/26/01 Scalb completely reworked and now standalone version
 // 05/20/02 Cleaned up namespace and sf0 syntax
 // 02/10/03 Reordered header: .section, .global, .proc, .align
+// 08/06/03 Improved performance
 //
 // API
 //==============================================================
@ -50,31 +51,52 @@
 // input  floating point f8 and floating point f9
 // output floating point f8
 //
+// int_type = 0 if int is 32 bits
+// int_type = 1 if int is 64 bits
+//
 // Returns x* 2**n using an fma and detects overflow
 // and underflow.
 //
 //
+// Strategy:
+//  Compute biased exponent of result exp_Result = N + exp_X
+//  Break into ranges:
+//   exp_Result > 0x1007e                 -> Certain overflow
+//   exp_Result = 0x1007e                 -> Possible overflow
+//   0x0ff81 <= exp_Result < 0x1007e      -> No over/underflow (main path)
+//   0x0ff81 - 23 <= exp_Result < 0x0ff81 -> Possible underflow
+//   exp_Result < 0x0ff81 - 23            -> Certain underflow

+FR_Big         = f6
+FR_NBig        = f7
 FR_Floating_X  = f8
 FR_Result      = f8
 FR_Floating_N  = f9
 FR_Result2     = f9
-FR_Norm_N      = f10
-FR_Result3     = f11
-FR_Norm_X      = f12
+FR_Result3     = f10
+FR_Norm_X      = f11
+FR_Two_N       = f12
 FR_N_float_int = f13
-FR_Two_N       = f14
-FR_Two_to_Big  = f15
-FR_Big         = f6
-FR_NBig        = f7
+FR_Norm_N      = f14

+GR_neg_ov_limit= r14
+GR_big_exp     = r14
 GR_N_Biased    = r15
 GR_Big         = r16
-GR_NBig        = r17
-GR_Scratch     = r18
-GR_Scratch1    = r19
+GR_exp_Result  = r18
+GR_pos_ov_limit= r19
+GR_exp_sure_ou = r19
 GR_Bias        = r20
 GR_N_as_int    = r21
+GR_signexp_X   = r22
+GR_exp_X       = r23
+GR_exp_mask    = r24
+GR_max_exp     = r25
+GR_min_exp     = r26
+GR_min_den_exp = r27
+GR_Scratch     = r28
+GR_signexp_N   = r29
+GR_exp_N       = r30

 GR_SAVE_B0          = r32
 GR_SAVE_GP          = r33
@ -89,163 +111,156 @@ GLOBAL_IEEE754_ENTRY(scalbf)

 //
 //   Is x NAN, INF, ZERO, +-?
-//
-{    .mfi
-     alloc          r32=ar.pfs,0,3,4,0
-     fclass.m.unc  p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
-     addl  GR_Scratch  = 0x019C3F,r0 
-}
-//
-//   Is y a NAN, INF, ZERO, +-?
-//
-{    .mfi
-     nop.m 999
-     fclass.m.unc  p6,p0 = FR_Floating_N, 0xe7 //@snan | @qnan | @inf |  @zero
-     addl  GR_Scratch1  = 0x063BF,r0 
-}
-;;
-
-//
-//   Convert N to a fp integer
-//   Normalize x
-//
-{    .mfi
-     nop.m 0
-     fnorm.s1  FR_Norm_N  =   FR_Floating_N 
-     nop.i 999
-}
-{    .mfi
-     nop.m 999
-     fnorm.s1  FR_Norm_X  =   FR_Floating_X 
-     nop.i 999
-};;
-
-//
-//   Create 2*big
-//   Create 2**-big 
-//   Normalize x
-//   Branch on special values.
-//
-{ .mib
-     setf.exp      FR_Big = GR_Scratch                  
-     nop.i 0 
-(p6) br.cond.spnt  SCALBF_NAN_INF_ZERO 
-}
-{ .mib
-     setf.exp      FR_NBig = GR_Scratch1                  
-     nop.i 0 
-(p7) br.cond.spnt  SCALBF_NAN_INF_ZERO 
-};;
-
-//
-//   Convert N to a fp integer
-//   Create -35000
-//  
-{    .mfi
-     addl  GR_Scratch = 1,r0
-     fcvt.fx.trunc.s1   FR_N_float_int = FR_Norm_N 
-     addl    GR_NBig = -35000,r0
-}
-;;
-
-//
-//   Put N if a GP register
-//   Convert  N_float_int to floating point value
-//   Create 35000
 //   Build the exponent Bias
 //
-{    .mii
-     getf.sig     GR_N_as_int = FR_N_float_int
-     shl   GR_Scratch = GR_Scratch,63
-     addl  GR_Big = 35000,r0
+{    .mfi
+     getf.exp      GR_signexp_N = FR_Floating_N // Get signexp of n
+     fclass.m      p6,p0 = FR_Floating_X, 0xe7  // @snan | @qnan | @inf | @zero
+     mov           GR_Bias = 0x0ffff
 }
 {    .mfi
-     addl GR_Bias = 0x0FFFF,r0
-     fcvt.xf  FR_N_float_int = FR_N_float_int
+     mov           GR_Big = 35000      // If N this big then certain overflow
+     fcvt.fx.trunc.s1   FR_N_float_int = FR_Floating_N // Get N in significand
     nop.i         0
-};;
+}
+;;
+
+{    .mfi
+     getf.exp      GR_signexp_X = FR_Floating_X // Get signexp of x
+     fclass.m      p7,p0 = FR_Floating_N, 0x0b  // Test for n=unorm
+     nop.i         0
+}
+//
+//   Normalize n
+//
+{    .mfi
+     mov           GR_exp_mask = 0x1ffff     // Exponent mask
+     fnorm.s1      FR_Norm_N = FR_Floating_N
+     nop.i         0
+}
+;;

 //
-//   Catch those fp values that are beyond 2**64-1
-//   Is N > 35000     
-//   Is N < -35000     
+//   Is n NAN, INF, ZERO, +-?
 //
 {    .mfi
-     cmp.ne.unc  p9,p10 = GR_N_as_int,GR_Scratch
-     nop.f 0
-     nop.i 0
+     mov           GR_big_exp = 0x1003e      // Exponent at which n is integer
+     fclass.m      p9,p0 = FR_Floating_N, 0xe7  // @snan | @qnan | @inf | @zero
+     mov           GR_max_exp = 0x1007e      // Exponent of maximum float
 }
+//
+//   Normalize x
+//
+{ .mfb
+     nop.m         0
+     fnorm.s1      FR_Norm_X = FR_Floating_X
+(p7) br.cond.spnt  SCALBF_N_UNORM             // Branch if n=unorm
+}
+;;
+
+SCALBF_COMMON1:
+// Main path continues.  Also return here from u=unorm path.
+//   Handle special cases if x = Nan, Inf, Zero
+{ .mfb
+     nop.m         0
+     fcmp.lt.s1    p7,p0 = FR_Floating_N, f0  // Test N negative
+(p6) br.cond.spnt  SCALBF_NAN_INF_ZERO
+}
+;;
+
+//   Handle special cases if n = Nan, Inf, Zero
+{    .mfi
+     getf.sig      GR_N_as_int = FR_N_float_int // Get n from significand
+     fclass.m      p8,p0 = FR_Floating_X, 0x0b // Test for x=unorm
+     mov           GR_exp_sure_ou = 0x1000e // Exp_N where x*2^N sure over/under
+}
+{    .mfb
+     mov           GR_min_exp = 0x0ff81      // Exponent of minimum float
+     fcvt.xf       FR_N_float_int = FR_N_float_int // Convert N to FP integer
+(p9) br.cond.spnt  SCALBF_NAN_INF_ZERO
+}
+;;
+
 {    .mmi
-     cmp.ge.unc p6, p0 = GR_N_as_int, GR_Big
-     cmp.le.unc p8, p0 = GR_N_as_int, GR_NBig
+     and           GR_exp_N = GR_exp_mask, GR_signexp_N // Get exponent of N
+(p7) sub           GR_Big = r0, GR_Big          // Limit for N
     nop.i         0
-};;
+}
+;;

-//
-//   Is N really an int, only for those non-int indefinites?
-//   Create exp bias.     
-//
-{    .mfi
+{    .mib
+     cmp.lt        p9,p0 = GR_exp_N, GR_big_exp // N possible non-integer?
+     cmp.ge        p6,p0 = GR_exp_N, GR_exp_sure_ou // N certain over/under?
+(p8) br.cond.spnt  SCALBF_X_UNORM             // Branch if x=unorm
+}
+;;
+
+SCALBF_COMMON2:
+// Main path continues.  Also return here from x=unorm path.
+//   Create biased exponent for 2**N
+{    .mmi
+(p6) mov           GR_N_as_int = GR_Big      // Limit N
+;;
     add           GR_N_Biased = GR_Bias,GR_N_as_int
-(p9) fcmp.neq.unc.s1 p7,p0  =   FR_Norm_N, FR_N_float_int
     nop.i         0
-};;
+}
+;;
+
+{    .mfi
+     setf.exp      FR_Two_N = GR_N_Biased               // Form 2**N
+(p9) fcmp.neq.unc.s1 p9,p0 = FR_Norm_N, FR_N_float_int  // Test if N an integer
+     and           GR_exp_X = GR_exp_mask, GR_signexp_X // Get exponent of X
+}
+;;

 //
-//   Branch and return if N is not an int.
-//   Main path, create 2**N
+//   Compute biased result exponent
+//   Branch if N is not an integer
+//
+{    .mib
+     add           GR_exp_Result = GR_exp_X, GR_N_as_int
+     mov           GR_min_den_exp = 0x0ff81 - 23 // Exponent of min denorm float
+(p9) br.cond.spnt  SCALBF_N_NOT_INT
+}
+;;
+
+//
+//   Raise Denormal operand flag with compare
+//   Do final operation
 //
 {    .mfi
-     setf.exp      FR_Two_N = GR_N_Biased                   
-     nop.i                      999
+     cmp.lt        p7,p6 = GR_exp_Result, GR_max_exp  // Test no overflow
+     fcmp.ge.s0    p0,p11 = FR_Floating_X,FR_Floating_N  // Dummy to set denorm
+     cmp.lt        p9,p0 = GR_exp_Result, GR_min_den_exp // Test sure underflow
 }
 {    .mfb
     nop.m         0
-(p7) frcpa.s0          f8,p11     =    f0,f0
-(p7) br.ret.spnt    b0          
-};;
-
-//
-//   Set denormal on denormal input x and denormal input N
-//
-{    .mfi
-     nop.m                      999
-(p10)fcmp.ge.s1    p6,p8 = FR_Norm_N,f0
-     nop.i 0
-};;
-{    .mfi
-     nop.m                      999
-     fcmp.ge.s0    p0,p11 = FR_Floating_X,f0
-     nop.i                      999
+     fma.s.s0      FR_Result = FR_Two_N,FR_Norm_X,f0
+(p9) br.cond.spnt  SCALBF_UNDERFLOW           // Branch if certain underflow
 }
-{    .mfi
-     nop.m                      999
-     fcmp.ge.s0    p12,p13 = FR_Floating_N,f0
-     nop.i 0
-};;
+;;

-//
-//   Adjust 2**N if N was very small or very large
-//
+{    .mib
+(p6) cmp.gt.unc    p6,p8 = GR_exp_Result, GR_max_exp  // Test sure overflow
+(p7) cmp.ge.unc    p7,p9 = GR_exp_Result, GR_min_exp  // Test no over/underflow
+(p7) br.ret.sptk   b0                         // Return from main path
+}
+;;

-{    .mfi
-     nop.m 0
-(p6) fma.s1  FR_Two_N = FR_Big,f1,f0
-     nop.i 0
+{    .bbb
+(p6) br.cond.spnt  SCALBF_OVERFLOW            // Branch if certain overflow
+(p8) br.cond.spnt  SCALBF_POSSIBLE_OVERFLOW   // Branch if possible overflow
+(p9) br.cond.spnt  SCALBF_POSSIBLE_UNDERFLOW  // Branch if possible underflow
 }
-{ .mlx
-     nop.m 999
-     movl GR_Scratch = 0x000000000003007F 
-};;
-{    .mfi
-     nop.m 0
-(p8) fma.s1  FR_Two_N = FR_NBig,f1,f0
-     nop.i 0
-}
-{    .mlx
-     nop.m 999
-     movl GR_Scratch1= 0x000000000001007F 
-};;
+;;
+
+// Here if possible underflow.
+// Resulting exponent: 0x0ff81-23 <= exp_Result < 0x0ff81
+SCALBF_POSSIBLE_UNDERFLOW:
+//
+// Here if possible overflow.
+// Resulting exponent: 0x1007e = exp_Result
+SCALBF_POSSIBLE_OVERFLOW:

 //   Set up necessary status fields
 //
@ -254,137 +269,150 @@ GLOBAL_IEEE754_ENTRY(scalbf)
 //   S3 user supplied status + FZ + TD   (Underflows)
 //
 {    .mfi
-     nop.m 999
+     mov           GR_pos_ov_limit = 0x1007f // Exponent for positive overflow
     fsetc.s3      0x7F,0x41
-     nop.i 999
+     nop.i         0
 }
 {    .mfi
-     nop.m 999
+     mov           GR_neg_ov_limit = 0x3007f // Exponent for negative overflow
     fsetc.s2      0x7F,0x42
-     nop.i 999
-};;
+     nop.i         0
+}
+;;

 //
-//   Do final operation
+//   Do final operation with s2 and s3
 //
 {    .mfi
-     setf.exp FR_NBig = GR_Scratch
-     fma.s.s0     FR_Result = FR_Two_N,FR_Norm_X,f0 
-     nop.i                           999
+     setf.exp      FR_NBig = GR_neg_ov_limit
+     fma.s.s3      FR_Result3 = FR_Two_N,FR_Norm_X,f0
+     nop.i         0
 }
 {    .mfi
-     nop.m                           999
-     fma.s.s3     FR_Result3 = FR_Two_N,FR_Norm_X,f0 
-     nop.i                           999
-};;
-{    .mfi
-     setf.exp FR_Big = GR_Scratch1
+     setf.exp      FR_Big = GR_pos_ov_limit
     fma.s.s2      FR_Result2 = FR_Two_N,FR_Norm_X,f0
-     nop.i                           999
-};;
+     nop.i         0
+}
+;;

 //   Check for overflow or underflow.
-//
-//   S0 user supplied status
-//   S2 user supplied status + WRE + TD  (Overflow)
-//   S3 user supplied status + FZ + TD   (Underflow)
-//
-//
 //   Restore s3
 //   Restore s2
 //
 {    .mfi
     nop.m         0
     fsetc.s3      0x7F,0x40
-     nop.i 999 
+     nop.i         0
 }
 {    .mfi
     nop.m         0
     fsetc.s2      0x7F,0x40
-     nop.i 999
-};;
+     nop.i         0
+}
+;;

 //
 //   Is the result zero?
 //
 {    .mfi
-     nop.m 999
-     fclass.m.unc   p6, p0 =  FR_Result3, 0x007
-     nop.i 999 
+     nop.m         0
+     fclass.m      p6, p0 =  FR_Result3, 0x007
+     nop.i         0
 }
 {    .mfi
-     addl GR_Tag = 55, r0
-     fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big
+     nop.m         0
+     fcmp.ge.s1    p7, p8 = FR_Result2 , FR_Big
     nop.i         0
-};;
+}
+;;

 //
 //   Detect masked underflow - Tiny + Inexact Only
 //
 {    .mfi
-     nop.m 999
+     nop.m         0
 (p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
-     nop.i 999 
-};; 
+     nop.i         0
+}
+;;

 //
 //   Is result bigger the allowed range?
 //   Branch out for underflow
 //
 {    .mfb
-(p6) addl GR_Tag = 56, r0
+     nop.m          0
 (p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
 (p6) br.cond.spnt   SCALBF_UNDERFLOW
-};;
+}
+;;

 //
 //   Branch out for overflow
 //
-{ .mbb
-     nop.m 0
+{ .bbb
 (p7) br.cond.spnt   SCALBF_OVERFLOW
 (p9) br.cond.spnt   SCALBF_OVERFLOW
-};;
-
-//
-//   Return from main path.
-//
-{    .mfb
-     nop.m 999
-     nop.f 0
-     br.ret.sptk     b0;;                   
+     br.ret.sptk    b0             //   Return from main path.
 }
+;;
+
+// Here if result overflows
+SCALBF_OVERFLOW:
+{ .mib
+     alloc         r32=ar.pfs,3,0,4,0
+     addl          GR_Tag = 55, r0     // Set error tag for overflow
+     br.cond.sptk  __libm_error_region // Call error support for overflow
+}
+;;
+
+// Here if result underflows
+SCALBF_UNDERFLOW:
+{ .mib
+     alloc         r32=ar.pfs,3,0,4,0
+     addl          GR_Tag = 56, r0     // Set error tag for underflow
+     br.cond.sptk  __libm_error_region // Call error support for underflow
+}
+;;

 SCALBF_NAN_INF_ZERO:

 //
-//   Convert N to a fp integer
+//   Before entry, N has been converted to a fp integer in significand of 
+//     FR_N_float_int
+//
+//   Convert  N_float_int to floating point value
 //
+{    .mfi
+     getf.sig     GR_N_as_int = FR_N_float_int
+     fclass.m     p6,p0 = FR_Floating_N, 0xc3 //@snan | @qnan
+     nop.i        0
+}
 {    .mfi
     addl         GR_Scratch = 1,r0
-     fcvt.fx.trunc.s1  FR_N_float_int = FR_Norm_N 
-     nop.i 999
-}
-{    .mfi
-     nop.m 0
-     fclass.m.unc  p6,p0 = FR_Floating_N, 0xc3 //@snan | @qnan 
+     fcvt.xf      FR_N_float_int = FR_N_float_int
     nop.i        0
-};;
+}
+;;
+
 {    .mfi
     nop.m        0
-     fclass.m.unc  p7,p0 = FR_Floating_X, 0xc3 //@snan | @qnan 
+     fclass.m     p7,p0 = FR_Floating_X, 0xc3 //@snan | @qnan
     shl          GR_Scratch = GR_Scratch,63
-};;
+}
+;;
+
 {    .mfi
     nop.m        0
-     fclass.m.unc  p8,p0 = FR_Floating_N, 0x21 // @inf
+     fclass.m     p8,p0 = FR_Floating_N, 0x21 // @inf
     nop.i        0
 }
 {    .mfi
     nop.m        0
-     fclass.m.unc  p9,p0 = FR_Floating_N, 0x22 // @-inf
+     fclass.m     p9,p0 = FR_Floating_N, 0x22 // @-inf
     nop.i        0
-};;
+}
+;;

 //
 //   Either X or N is a Nan, return result and possible raise invalid.
@ -393,12 +421,15 @@ SCALBF_NAN_INF_ZERO:
     nop.m        0
 (p6) fma.s.s0     FR_Result = FR_Floating_N,FR_Floating_X,f0
 (p6) br.ret.spnt  b0
-};;
+}
+;;
+
 {    .mfb
-     getf.sig     GR_N_as_int = FR_N_float_int
+     nop.m        0
 (p7) fma.s.s0     FR_Result = FR_Floating_N,FR_Floating_X,f0
 (p7) br.ret.spnt  b0
-};;
+}
+;;

 //
 //   If N + Inf do something special
@ -413,43 +444,38 @@ SCALBF_NAN_INF_ZERO:
     nop.m        0
 (p9) fnma.s.s0    FR_Floating_N = FR_Floating_N, f1, f0
     nop.i        0
-};;
+}
+;;

 //
 //   If N==-Inf,return x/(-N)
 //
 {    .mfb
-     nop.m 0
-(p9) frcpa.s0        FR_Result,p6 =  FR_Floating_X,FR_Floating_N
+     cmp.ne       p7,p0 = GR_N_as_int,GR_Scratch
+(p9) frcpa.s0     FR_Result,p0 = FR_Floating_X,FR_Floating_N
 (p9) br.ret.spnt  b0
-};;
-
-//
-//   Convert  N_float_int to floating point value
-//
-{     .mfi
-     cmp.ne.unc  p9,p0     =   GR_N_as_int,GR_Scratch
-     fcvt.xf  FR_N_float_int = FR_N_float_int
-     nop.i  0
-};;
+}
+;;

 //
 //   Is N an integer.
 //
 {    .mfi
     nop.m        0
-(p9) fcmp.neq.unc.s1 p7,p0  =   FR_Norm_N, FR_N_float_int
+(p7) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int
     nop.i        0
-};;
+}
+;;

 //
 //   If N not an int, return NaN and raise invalid.
 //
 {    .mfb
     nop.m        0
-(p7) frcpa.s0        FR_Result,p6     =    f0,f0
+(p7) frcpa.s0     FR_Result,p0 = f0,f0
 (p7) br.ret.spnt  b0
-};;
+}
+;;

 //
 //   Always return x in other path.
@ -458,13 +484,39 @@ SCALBF_NAN_INF_ZERO:
     nop.m        0
     fma.s.s0     FR_Result = FR_Floating_X,f1,f0
     br.ret.sptk  b0
-};;
+}
+;;
+
+// Here if n not int
+// Return NaN and raise invalid.
+SCALBF_N_NOT_INT:
+{    .mfb
+     nop.m        0
+     frcpa.s0     FR_Result,p0 = f0,f0
+     br.ret.sptk  b0
+}
+;;
+
+// Here if n=unorm
+SCALBF_N_UNORM:
+{ .mfb
+     getf.exp      GR_signexp_N = FR_Norm_N // Get signexp of normalized n
+     fcvt.fx.trunc.s1   FR_N_float_int = FR_Norm_N // Get N in significand
+     br.cond.sptk  SCALBF_COMMON1            // Return to main path
+}
+;;
+
+// Here if x=unorm
+SCALBF_X_UNORM:
+{ .mib
+     getf.exp      GR_signexp_X = FR_Norm_X // Get signexp of normalized x
+     nop.i         0
+     br.cond.sptk  SCALBF_COMMON2            // Return to main path
+}
+;;

 GLOBAL_IEEE754_END(scalbf)
-__libm_error_region:
-
-SCALBF_OVERFLOW: 
-SCALBF_UNDERFLOW: 
+LOCAL_LIBM_ENTRY(__libm_error_region)

 //
 // Get stack address of N
@ -517,9 +569,9 @@ SCALBF_UNDERFLOW:
 //  Get location of result on stack
 //
 { .mmi
-   nop.m 0
-   nop.m 0
   add   GR_Parameter_RESULT = 48,sp
+   nop.m 0
+   nop.i 0
 };;

 //
--- a/sysdeps/ia64/fpu/e_scalbl.S
+++ b/sysdeps/ia64/fpu/e_scalbl.S
@ -43,38 +43,60 @@
 // 01/26/01 Scalb completely reworked and now standalone version
 // 05/20/02 Cleaned up namespace and sf0 syntax
 // 02/10/03 Reordered header: .section, .global, .proc, .align
+// 08/06/03 Improved performance
 //
 // API
 //==============================================================
-// double-extended = scalbl  (double-extended x, double-extended n) 
+// long double = scalbl  (long double x, long double n)
 // input  floating point f8 and floating point f9
 // output floating point f8
 //
+// int_type = 0 if int is 32 bits
+// int_type = 1 if int is 64 bits
+//
 // Returns x* 2**n using an fma and detects overflow
 // and underflow.
 //
 //
+// Strategy:
+//  Compute biased exponent of result exp_Result = N + exp_X
+//  Break into ranges:
+//   exp_Result > 0x13ffe                 -> Certain overflow
+//   exp_Result = 0x13ffe                 -> Possible overflow
+//   0x0c001 <= exp_Result < 0x13ffe      -> No over/underflow (main path)
+//   0x0c001 - 63 <= exp_Result < 0x0c001 -> Possible underflow
+//   exp_Result < 0x0c001 - 63            -> Certain underflow

+FR_Big         = f6
+FR_NBig        = f7
 FR_Floating_X  = f8
 FR_Result      = f8
 FR_Floating_N  = f9
 FR_Result2     = f9
-FR_Norm_N      = f10
-FR_Result3     = f11
-FR_Norm_X      = f12
+FR_Result3     = f10
+FR_Norm_X      = f11
+FR_Two_N       = f12
 FR_N_float_int = f13
-FR_Two_N       = f14
-FR_Two_to_Big  = f15
-FR_Big         = f6
-FR_NBig        = f7
+FR_Norm_N      = f14

+GR_neg_ov_limit= r14
+GR_big_exp     = r14
 GR_N_Biased    = r15
 GR_Big         = r16
-GR_NBig        = r17
-GR_Scratch     = r18
-GR_Scratch1    = r19
+GR_exp_Result  = r18
+GR_pos_ov_limit= r19
+GR_exp_sure_ou = r19
 GR_Bias        = r20
 GR_N_as_int    = r21
+GR_signexp_X   = r22
+GR_exp_X       = r23
+GR_exp_mask    = r24
+GR_max_exp     = r25
+GR_min_exp     = r26
+GR_min_den_exp = r27
+GR_Scratch     = r28
+GR_signexp_N   = r29
+GR_exp_N       = r30

 GR_SAVE_B0          = r32
 GR_SAVE_GP          = r33
@ -89,163 +111,156 @@ GLOBAL_IEEE754_ENTRY(scalbl)

 //
 //   Is x NAN, INF, ZERO, +-?
-//
-{    .mfi
-     alloc          r32=ar.pfs,0,3,4,0
-     fclass.m.unc  p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
-     addl  GR_Scratch  = 0x019C3F,r0 
-}
-//
-//   Is y a NAN, INF, ZERO, +-?
-//
-{    .mfi
-     nop.m 999
-     fclass.m.unc  p6,p0 = FR_Floating_N, 0xe7 //@snan | @qnan | @inf |  @zero
-     addl  GR_Scratch1  = 0x063BF,r0 
-}
-;;
-
-//
-//   Convert N to a fp integer
-//   Normalize x
-//
-{    .mfi
-     nop.m 0
-     fnorm.s1  FR_Norm_N  =   FR_Floating_N 
-     nop.i 999
-}
-{    .mfi
-     nop.m 999
-     fnorm.s1  FR_Norm_X  =   FR_Floating_X 
-     nop.i 999
-};;
-
-//
-//   Create 2*big
-//   Create 2**-big 
-//   Normalize x
-//   Branch on special values.
-//
-{ .mib
-     setf.exp      FR_Big = GR_Scratch                  
-     nop.i 0 
-(p6) br.cond.spnt  SCALBL_NAN_INF_ZERO 
-}
-{ .mib
-     setf.exp      FR_NBig = GR_Scratch1                  
-     nop.i 0 
-(p7) br.cond.spnt  SCALBL_NAN_INF_ZERO 
-};;
-
-//
-//   Convert N to a fp integer
-//   Create -35000
-//  
-{    .mfi
-     addl  GR_Scratch = 1,r0
-     fcvt.fx.trunc.s1   FR_N_float_int = FR_Norm_N 
-     addl    GR_NBig = -35000,r0
-}
-;;
-
-//
-//   Put N if a GP register
-//   Convert  N_float_int to floating point value
-//   Create 35000
 //   Build the exponent Bias
 //
-{    .mii
-     getf.sig     GR_N_as_int = FR_N_float_int
-     shl   GR_Scratch = GR_Scratch,63
-     addl  GR_Big = 35000,r0
+{    .mfi
+     getf.exp      GR_signexp_N = FR_Floating_N // Get signexp of n
+     fclass.m      p6,p0 = FR_Floating_X, 0xe7  // @snan | @qnan | @inf | @zero
+     mov           GR_Bias = 0x0ffff
 }
 {    .mfi
-     addl GR_Bias = 0x0FFFF,r0
-     fcvt.xf  FR_N_float_int = FR_N_float_int
+     mov           GR_Big = 35000      // If N this big then certain overflow
+     fcvt.fx.trunc.s1   FR_N_float_int = FR_Floating_N // Get N in significand
     nop.i         0
-};;
+}
+;;
+
+{    .mfi
+     getf.exp      GR_signexp_X = FR_Floating_X // Get signexp of x
+     fclass.m      p7,p0 = FR_Floating_N, 0x0b  // Test for n=unorm
+     nop.i         0
+}
+//
+//   Normalize n
+//
+{    .mfi
+     mov           GR_exp_mask = 0x1ffff     // Exponent mask
+     fnorm.s1      FR_Norm_N = FR_Floating_N
+     nop.i         0
+}
+;;

 //
-//   Catch those fp values that are beyond 2**64-1
-//   Is N > 35000     
-//   Is N < -35000     
+//   Is n NAN, INF, ZERO, +-?
 //
 {    .mfi
-     cmp.ne.unc  p9,p10 = GR_N_as_int,GR_Scratch
-     nop.f 0
-     nop.i 0
+     mov           GR_big_exp = 0x1003e      // Exponent at which n is integer
+     fclass.m      p9,p0 = FR_Floating_N, 0xe7  // @snan | @qnan | @inf | @zero
+     mov           GR_max_exp = 0x13ffe      // Exponent of maximum long double
 }
+//
+//   Normalize x
+//
+{ .mfb
+     nop.m         0
+     fnorm.s1      FR_Norm_X = FR_Floating_X
+(p7) br.cond.spnt  SCALBL_N_UNORM             // Branch if n=unorm
+}
+;;
+
+SCALBL_COMMON1:
+// Main path continues.  Also return here from u=unorm path.
+//   Handle special cases if x = Nan, Inf, Zero
+{ .mfb
+     nop.m         0
+     fcmp.lt.s1    p7,p0 = FR_Floating_N, f0  // Test N negative
+(p6) br.cond.spnt  SCALBL_NAN_INF_ZERO
+}
+;;
+
+//   Handle special cases if n = Nan, Inf, Zero
+{    .mfi
+     getf.sig      GR_N_as_int = FR_N_float_int // Get n from significand
+     fclass.m      p8,p0 = FR_Floating_X, 0x0b // Test for x=unorm
+     mov           GR_exp_sure_ou = 0x1000e // Exp_N where x*2^N sure over/under
+}
+{    .mfb
+     mov           GR_min_exp = 0x0c001      // Exponent of minimum long double
+     fcvt.xf       FR_N_float_int = FR_N_float_int // Convert N to FP integer
+(p9) br.cond.spnt  SCALBL_NAN_INF_ZERO
+}
+;;
+
 {    .mmi
-     cmp.ge.unc p6, p0 = GR_N_as_int, GR_Big
-     cmp.le.unc p8, p0 = GR_N_as_int, GR_NBig
+     and           GR_exp_N = GR_exp_mask, GR_signexp_N // Get exponent of N
+(p7) sub           GR_Big = r0, GR_Big          // Limit for N
     nop.i         0
-};;
+}
+;;

-//
-//   Is N really an int, only for those non-int indefinites?
-//   Create exp bias.     
-//
-{    .mfi
+{    .mib
+     cmp.lt        p9,p0 = GR_exp_N, GR_big_exp // N possible non-integer?
+     cmp.ge        p6,p0 = GR_exp_N, GR_exp_sure_ou // N certain over/under?
+(p8) br.cond.spnt  SCALBL_X_UNORM             // Branch if x=unorm
+}
+;;
+
+SCALBL_COMMON2:
+// Main path continues.  Also return here from x=unorm path.
+//   Create biased exponent for 2**N
+{    .mmi
+(p6) mov           GR_N_as_int = GR_Big      // Limit N
+;;
     add           GR_N_Biased = GR_Bias,GR_N_as_int
-(p9) fcmp.neq.unc.s1 p7,p0  =   FR_Norm_N, FR_N_float_int
     nop.i         0
-};;
+}
+;;
+
+{    .mfi
+     setf.exp      FR_Two_N = GR_N_Biased               // Form 2**N
+(p9) fcmp.neq.unc.s1 p9,p0 = FR_Norm_N, FR_N_float_int  // Test if N an integer
+     and           GR_exp_X = GR_exp_mask, GR_signexp_X // Get exponent of X
+}
+;;

 //
-//   Branch and return if N is not an int.
-//   Main path, create 2**N
+//   Compute biased result exponent
+//   Branch if N is not an integer
+//
+{    .mib
+     add           GR_exp_Result = GR_exp_X, GR_N_as_int
+     mov           GR_min_den_exp = 0x0c001 - 63 // Exp of min denorm long dble
+(p9) br.cond.spnt  SCALBL_N_NOT_INT
+}
+;;
+
+//
+//   Raise Denormal operand flag with compare
+//   Do final operation
 //
 {    .mfi
-     setf.exp      FR_Two_N = GR_N_Biased                   
-     nop.i                      999
+     cmp.lt        p7,p6 = GR_exp_Result, GR_max_exp  // Test no overflow
+     fcmp.ge.s0    p0,p11 = FR_Floating_X,FR_Floating_N  // Dummy to set denorm
+     cmp.lt        p9,p0 = GR_exp_Result, GR_min_den_exp // Test sure underflow
 }
 {    .mfb
     nop.m         0
-(p7) frcpa.s0          f8,p11     =    f0,f0
-(p7) br.ret.spnt    b0          
-};;
-
-//
-//   Set denormal on denormal input x and denormal input N
-//
-{    .mfi
-     nop.m                      999
-(p10)fcmp.ge.s1    p6,p8 = FR_Norm_N,f0
-     nop.i 0
-};;
-{    .mfi
-     nop.m                      999
-     fcmp.ge.s0    p0,p11 = FR_Floating_X,f0
-     nop.i                      999
+     fma.s0        FR_Result = FR_Two_N,FR_Norm_X,f0
+(p9) br.cond.spnt  SCALBL_UNDERFLOW           // Branch if certain underflow
 }
-{    .mfi
-     nop.m                      999
-     fcmp.ge.s0    p12,p13 = FR_Floating_N,f0
-     nop.i 0
-};;
+;;

-//
-//   Adjust 2**N if N was very small or very large
-//
+{    .mib
+(p6) cmp.gt.unc    p6,p8 = GR_exp_Result, GR_max_exp  // Test sure overflow
+(p7) cmp.ge.unc    p7,p9 = GR_exp_Result, GR_min_exp  // Test no over/underflow
+(p7) br.ret.sptk   b0                         // Return from main path
+}
+;;

-{    .mfi
-     nop.m 0
-(p6) fma.s1  FR_Two_N = FR_Big,f1,f0
-     nop.i 0
+{    .bbb
+(p6) br.cond.spnt  SCALBL_OVERFLOW            // Branch if certain overflow
+(p8) br.cond.spnt  SCALBL_POSSIBLE_OVERFLOW   // Branch if possible overflow
+(p9) br.cond.spnt  SCALBL_POSSIBLE_UNDERFLOW  // Branch if possible underflow
 }
-{ .mlx
-     nop.m 999
-     movl GR_Scratch = 0x0000000000033FFF 
-};;
-{    .mfi
-     nop.m 0
-(p8) fma.s1  FR_Two_N = FR_NBig,f1,f0
-     nop.i 0
-}
-{    .mlx
-     nop.m 999
-     movl GR_Scratch1= 0x0000000000013FFF 
-};;
+;;
+
+// Here if possible underflow.
+// Resulting exponent: 0x0c001-63 <= exp_Result < 0x0c001
+SCALBL_POSSIBLE_UNDERFLOW:
+//
+// Here if possible overflow.
+// Resulting exponent: 0x13ffe = exp_Result
+SCALBL_POSSIBLE_OVERFLOW:

 //   Set up necessary status fields
 //
@ -254,137 +269,150 @@ GLOBAL_IEEE754_ENTRY(scalbl)
 //   S3 user supplied status + FZ + TD   (Underflows)
 //
 {    .mfi
-     nop.m 999
+     mov           GR_pos_ov_limit = 0x13fff // Exponent for positive overflow
     fsetc.s3      0x7F,0x41
-     nop.i 999
+     nop.i         0
 }
 {    .mfi
-     nop.m 999
+     mov           GR_neg_ov_limit = 0x33fff // Exponent for negative overflow
     fsetc.s2      0x7F,0x42
-     nop.i 999
-};;
+     nop.i         0
+}
+;;

 //
-//   Do final operation
+//   Do final operation with s2 and s3
 //
 {    .mfi
-     setf.exp FR_NBig = GR_Scratch
-     fma.s0     FR_Result = FR_Two_N,FR_Norm_X,f0 
-     nop.i                           999
+     setf.exp      FR_NBig = GR_neg_ov_limit
+     fma.s3        FR_Result3 = FR_Two_N,FR_Norm_X,f0
+     nop.i         0
 }
 {    .mfi
-     nop.m                           999
-     fma.s3     FR_Result3 = FR_Two_N,FR_Norm_X,f0 
-     nop.i                           999
-};;
-{    .mfi
-     setf.exp FR_Big = GR_Scratch1
+     setf.exp      FR_Big = GR_pos_ov_limit
     fma.s2        FR_Result2 = FR_Two_N,FR_Norm_X,f0
-     nop.i                           999
-};;
+     nop.i         0
+}
+;;

 //   Check for overflow or underflow.
-//
-//   S0 user supplied status
-//   S2 user supplied status + WRE + TD  (Overflow)
-//   S3 user supplied status + FZ + TD   (Underflow)
-//
-//
 //   Restore s3
 //   Restore s2
 //
 {    .mfi
     nop.m         0
     fsetc.s3      0x7F,0x40
-     nop.i 999 
+     nop.i         0
 }
 {    .mfi
     nop.m         0
     fsetc.s2      0x7F,0x40
-     nop.i 999
-};;
+     nop.i         0
+}
+;;

 //
 //   Is the result zero?
 //
 {    .mfi
-     nop.m 999
-     fclass.m.unc   p6, p0 =  FR_Result3, 0x007
-     nop.i 999 
+     nop.m         0
+     fclass.m      p6, p0 =  FR_Result3, 0x007
+     nop.i         0
 }
 {    .mfi
-     addl GR_Tag = 51, r0
-     fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big
+     nop.m         0
+     fcmp.ge.s1    p7, p8 = FR_Result2 , FR_Big
     nop.i         0
-};;
+}
+;;

 //
 //   Detect masked underflow - Tiny + Inexact Only
 //
 {    .mfi
-     nop.m 999
+     nop.m         0
 (p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
-     nop.i 999 
-};; 
+     nop.i         0
+}
+;;

 //
 //   Is result bigger the allowed range?
 //   Branch out for underflow
 //
 {    .mfb
-(p6) addl GR_Tag = 52, r0
+     nop.m          0
 (p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
 (p6) br.cond.spnt   SCALBL_UNDERFLOW
-};;
+}
+;;

 //
 //   Branch out for overflow
 //
-{ .mbb
-     nop.m 0
+{ .bbb
 (p7) br.cond.spnt   SCALBL_OVERFLOW
 (p9) br.cond.spnt   SCALBL_OVERFLOW
-};;
-
-//
-//   Return from main path.
-//
-{    .mfb
-     nop.m 999
-     nop.f 0
-     br.ret.sptk     b0;;                   
+     br.ret.sptk    b0             //   Return from main path.
 }
+;;
+
+// Here if result overflows
+SCALBL_OVERFLOW:
+{ .mib
+     alloc         r32=ar.pfs,3,0,4,0
+     addl          GR_Tag = 51, r0     // Set error tag for overflow
+     br.cond.sptk  __libm_error_region // Call error support for overflow
+}
+;;
+
+// Here if result underflows
+SCALBL_UNDERFLOW:
+{ .mib
+     alloc         r32=ar.pfs,3,0,4,0
+     addl          GR_Tag = 52, r0     // Set error tag for underflow
+     br.cond.sptk  __libm_error_region // Call error support for underflow
+}
+;;

 SCALBL_NAN_INF_ZERO:

 //
-//   Convert N to a fp integer
+//   Before entry, N has been converted to a fp integer in significand of 
+//     FR_N_float_int
+//
+//   Convert  N_float_int to floating point value
 //
+{    .mfi
+     getf.sig     GR_N_as_int = FR_N_float_int
+     fclass.m     p6,p0 = FR_Floating_N, 0xc3 //@snan | @qnan
+     nop.i        0
+}
 {    .mfi
     addl         GR_Scratch = 1,r0
-     fcvt.fx.trunc.s1  FR_N_float_int = FR_Norm_N 
-     nop.i 999
-}
-{    .mfi
-     nop.m 0
-     fclass.m.unc  p6,p0 = FR_Floating_N, 0xc3 //@snan | @qnan 
+     fcvt.xf      FR_N_float_int = FR_N_float_int
     nop.i        0
-};;
+}
+;;
+
 {    .mfi
     nop.m        0
-     fclass.m.unc  p7,p0 = FR_Floating_X, 0xc3 //@snan | @qnan 
+     fclass.m     p7,p0 = FR_Floating_X, 0xc3 //@snan | @qnan
     shl          GR_Scratch = GR_Scratch,63
-};;
+}
+;;
+
 {    .mfi
     nop.m        0
-     fclass.m.unc  p8,p0 = FR_Floating_N, 0x21 // @inf
+     fclass.m     p8,p0 = FR_Floating_N, 0x21 // @inf
     nop.i        0
 }
 {    .mfi
     nop.m        0
-     fclass.m.unc  p9,p0 = FR_Floating_N, 0x22 // @-inf
+     fclass.m     p9,p0 = FR_Floating_N, 0x22 // @-inf
     nop.i        0
-};;
+}
+;;

 //
 //   Either X or N is a Nan, return result and possible raise invalid.
@ -393,12 +421,15 @@ SCALBL_NAN_INF_ZERO:
     nop.m        0
 (p6) fma.s0       FR_Result = FR_Floating_N,FR_Floating_X,f0
 (p6) br.ret.spnt  b0
-};;
+}
+;;
+
 {    .mfb
-     getf.sig     GR_N_as_int = FR_N_float_int
+     nop.m        0
 (p7) fma.s0       FR_Result = FR_Floating_N,FR_Floating_X,f0
 (p7) br.ret.spnt  b0
-};;
+}
+;;

 //
 //   If N + Inf do something special
@ -413,43 +444,38 @@ SCALBL_NAN_INF_ZERO:
     nop.m        0
 (p9) fnma.s0      FR_Floating_N = FR_Floating_N, f1, f0
     nop.i        0
-};;
+}
+;;

 //
 //   If N==-Inf,return x/(-N)
 //
 {    .mfb
-     nop.m 0
-(p9) frcpa.s0        FR_Result,p6 =  FR_Floating_X,FR_Floating_N
+     cmp.ne       p7,p0 = GR_N_as_int,GR_Scratch
+(p9) frcpa.s0     FR_Result,p0 = FR_Floating_X,FR_Floating_N
 (p9) br.ret.spnt  b0
-};;
-
-//
-//   Convert  N_float_int to floating point value
-//
-{     .mfi
-     cmp.ne.unc  p9,p0     =   GR_N_as_int,GR_Scratch
-     fcvt.xf  FR_N_float_int = FR_N_float_int
-     nop.i  0
-};;
+}
+;;

 //
 //   Is N an integer.
 //
 {    .mfi
     nop.m        0
-(p9) fcmp.neq.unc.s1 p7,p0  =   FR_Norm_N, FR_N_float_int
+(p7) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int
     nop.i        0
-};;
+}
+;;

 //
 //   If N not an int, return NaN and raise invalid.
 //
 {    .mfb
     nop.m        0
-(p7) frcpa.s0        FR_Result,p6     =    f0,f0
+(p7) frcpa.s0     FR_Result,p0 = f0,f0
 (p7) br.ret.spnt  b0
-};;
+}
+;;

 //
 //   Always return x in other path.
@ -458,13 +484,39 @@ SCALBL_NAN_INF_ZERO:
     nop.m        0
     fma.s0       FR_Result = FR_Floating_X,f1,f0
     br.ret.sptk  b0
-};;
+}
+;;
+
+// Here if n not int
+// Return NaN and raise invalid.
+SCALBL_N_NOT_INT:
+{    .mfb
+     nop.m        0
+     frcpa.s0     FR_Result,p0 = f0,f0
+     br.ret.sptk  b0
+}
+;;
+
+// Here if n=unorm
+SCALBL_N_UNORM:
+{ .mfb
+     getf.exp      GR_signexp_N = FR_Norm_N // Get signexp of normalized n
+     fcvt.fx.trunc.s1   FR_N_float_int = FR_Norm_N // Get N in significand
+     br.cond.sptk  SCALBL_COMMON1            // Return to main path
+}
+;;
+
+// Here if x=unorm
+SCALBL_X_UNORM:
+{ .mib
+     getf.exp      GR_signexp_X = FR_Norm_X // Get signexp of normalized x
+     nop.i         0
+     br.cond.sptk  SCALBL_COMMON2            // Return to main path
+}
+;;

 GLOBAL_IEEE754_END(scalbl)
-__libm_error_region:
-
-SCALBL_OVERFLOW: 
-SCALBL_UNDERFLOW: 
+LOCAL_LIBM_ENTRY(__libm_error_region)

 //
 // Get stack address of N
@ -517,9 +569,9 @@ SCALBL_UNDERFLOW:
 //  Get location of result on stack
 //
 { .mmi
-   nop.m 0
-   nop.m 0
   add   GR_Parameter_RESULT = 48,sp
+   nop.m 0
+   nop.i 0
 };;

 //
--- a/sysdeps/ia64/fpu/e_sinh.S
+++ b/sysdeps/ia64/fpu/e_sinh.S
@ -850,6 +850,7 @@ SINH_UNORM:

 GLOBAL_IEEE754_END(sinh)

+
 LOCAL_LIBM_ENTRY(__libm_error_region)
 .prologue
 { .mfi
--- a/sysdeps/ia64/fpu/e_sinhf.S
+++ b/sysdeps/ia64/fpu/e_sinhf.S
@ -689,6 +689,7 @@ SINH_UNORM:

 GLOBAL_IEEE754_END(sinhf)

+
 LOCAL_LIBM_ENTRY(__libm_error_region)
 .prologue
 { .mfi
--- a/sysdeps/ia64/fpu/e_sinhl.S
+++ b/sysdeps/ia64/fpu/e_sinhl.S
@ -1055,6 +1055,7 @@ SINH_HUGE:

 GLOBAL_IEEE754_END(sinhl)

+
 LOCAL_LIBM_ENTRY(__libm_error_region)
 .prologue

--- a/sysdeps/ia64/fpu/e_sqrt.S
+++ b/sysdeps/ia64/fpu/e_sqrt.S
@ -252,6 +252,7 @@ GLOBAL_IEEE754_ENTRY(sqrt)
 }
 // END DOUBLE PRECISION MINIMUM LATENCY SQUARE ROOT ALGORITHM
 GLOBAL_IEEE754_END(sqrt)
+
 // Stack operations when calling error support.
 //       (1)               (2)                          (3) (call)              (4)
 //   sp   -> +          psp -> +                     psp -> +                   sp -> +
--- a/sysdeps/ia64/fpu/e_sqrtf.S
+++ b/sysdeps/ia64/fpu/e_sqrtf.S
@ -204,6 +204,7 @@ GLOBAL_IEEE754_ENTRY(sqrtf)
 //
 GLOBAL_IEEE754_END(sqrtf)

+
 LOCAL_LIBM_ENTRY(__libm_error_region)
 .prologue
 { .mii
--- a/sysdeps/ia64/fpu/e_sqrtl.S
+++ b/sysdeps/ia64/fpu/e_sqrtl.S
@ -221,6 +221,7 @@ alloc r32= ar.pfs,0,5,4,0

 // END DOUBLE EXTENDED PRECISION MINIMUM LATENCY SQUARE ROOT ALGORITHM
 GLOBAL_IEEE754_END(sqrtl)
+
 LOCAL_LIBM_ENTRY(__libm_error_region)
 .prologue
 { .mfi
--- a/sysdeps/ia64/fpu/gen_import_file_list
+++ b/sysdeps/ia64/fpu/gen_import_file_list
@ -16,8 +16,18 @@ import_c() {
 	echo "$1 $libm_dir/$2 $3"
 }

-import_c DUMMY libm_support.h libm_support.h
-import_c DUMMY libm_error.c libm_error.c
+dummy_files="
+libm_cpu_defs.h
+libm_error_codes.h
+libm_support.h
+libm_error.c
+"
+
+for f in $dummy_files
+do
+  import_c DUMMY $f $f
+done
+
 import_c scalblnf scalblnf.c s_scalblnf.c

 for f in acos acosh asin atanh cosh exp2 exp10 fmod log2 pow remainder \
--- a/sysdeps/ia64/fpu/import_file.awk
+++ b/sysdeps/ia64/fpu/import_file.awk
@ -7,9 +7,12 @@ BEGIN {
 	getline;
 	while (!match($0, "^// WARRANTY DISCLAIMER")) {
 		print;
-		getline;
+		if (!getline) {
+			break;
 		}
-	getline;
+	}
+	if (getline)
+	{
 		printf								      \
 "// Redistribution and use in source and binary forms, with or without\n"     \
 "// modification, are permitted provided that the following conditions are\n" \
@ -31,6 +34,7 @@ BEGIN {
 			} while (getline);
 		}
 	}
+}

 /^[.]data/ {
 	print "RODATA";
@ -115,7 +119,6 @@ BEGIN {
 			print
 			getline;
 		}
-		getline;
 		printf "%s_END(%s)\n", type, name;
 		if (match(name, "^exp10[fl]?$")) {
 			t=substr(name,6)
--- a/sysdeps/ia64/fpu/import_intel_libm
+++ b/sysdeps/ia64/fpu/import_intel_libm
@ -16,6 +16,7 @@ import_s() {
 	# $2 = source file-name
 	# $3 = destination file-name
 	echo "Importing $1 from $2 -> $3"
+	rm -f $3
 	awk -f import_file.awk FUNC=$1 $2 > $3
 }

@ -24,19 +25,82 @@ import_c() {
 	# $2 = source file-name
 	# $3 = destination file-name
 	echo "Importing $1 from $2 -> $3"
+	rm -f $3
 	awk -f import_file.awk LICENSE_ONLY=y $2 > $3
 }

 do_imports() {
    while read func_pattern src_file dst_file; do
-	if [ "$(expr $src_file : '.*\(c\)$')" = "c" ]; then
+        case $src_file in
+	*.[ch])
 	    import_c "$func_pattern" "$src_file" "$dst_file"
-	else
+	    ;;
+	*)
 	    import_s "$func_pattern" "$src_file" "$dst_file"
-	fi
+	    ;;
+	esac
    done
 }

 ./gen_import_file_list $libm_dir > import_file_list

 do_imports < import_file_list
+
+emptyfiles="
+e_gamma_r.c
+e_gammaf_r.c
+e_gammal_r.c
+s_sincos.c
+s_sincosf.c
+s_sincosl.c
+t_exp.c
+w_acosh.c
+w_acoshf.c
+w_acoshl.c
+w_atanh.c
+w_atanhf.c
+w_atanhl.c
+w_exp10.c
+w_exp10f.c
+w_exp10l.c
+w_exp2.c
+w_exp2f.c
+w_exp2l.c
+w_expl.c
+w_lgamma_r.c
+w_lgammaf_r.c
+w_lgammal_r.c
+w_log2.c
+w_log2f.c
+w_log2l.c
+w_sinh.c
+w_sinhf.c
+w_sinhl.c
+"
+for f in $emptyfiles
+do
+  rm -f $f
+  echo "/* Not needed. */" > $f
+done
+
+removedfiles="
+libm_atan2_reg.S
+s_ldexp.S
+s_ldexpf.S
+s_ldexpl.S
+s_scalbn.S
+s_scalbnf.S
+s_scalbnl.S
+"
+
+rm -f $removedfiles
+
+for f in lgammaf_r.c lgammal_r.c lgamma_r.c
+do
+  import_c $f $libm_dir/$f e_$f
+done
+
+for f in lgamma.c lgammaf.c lgammal.c
+do
+  import_c $f $libm_dir/$f w_$f
+done
--- a/sysdeps/ia64/fpu/libm_error.c
+++ b/sysdeps/ia64/fpu/libm_error.c
@ -1,7 +1,7 @@
 /* file: libm_error.c */


-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (c) 2000 - 2004, Intel Corporation
 // All rights reserved.
 //
 // Contributed 2000 by the Intel Numerics Group, Intel Corporation
@ -69,7 +69,22 @@
 //           Added code for tgamma
 // 04/11/03: Corrected POSIX/SVID/XOPEN branches for gamma/lgamma
 //           to return EDOM for neg ints.
-//
+// 09/08/03: Corrected XOPEN/SVID result for pow overflow with neg x, pos y.
+// 10/14/03: Added ILP32 ifdef
+// 12/12/03: Corrected XOPEN/SVID results for powf_zero_to_negative,
+//           powl_neg_to_non_integer, atan2f_zero, atan2df_zero,
+//           acoshf_lt_one, acosh_lt_one.
+// 12/07/04: Cast name strings as char *.
+// 12/08/04: Corrected POSIX behavior for atan2_zero, acos_gt_one, asin_gt_one,
+//           log_negative, log10_negative, log1p_negative, and log2_negative.
+//           Added SVID and XOPEN case log2l_zero.
+// 12/13/04: Corrected POSIX behavior for exp2_overflow, exp2_underflow,
+//           exp10_overflow, exp10_underflow.  Added ISOC to set errno for
+//           exp10_underflow.
+// 12/14/04: Corrected POSIX behavior for nextafter_overflow, 
+//           nextafter_underflow, nexttoward_overflow, nexttoward_underflow.  
+//           Added ISOC to set errno for nextafter and nexttoward underflow.
+// 12/15/04: Corrected POSIX behavior for exp, exp2, and exp10 underflow.

 #include <errno.h>
 #include <stdio.h>
@ -127,7 +142,7 @@ struct exception  exc;
 struct exceptionf excf;
 struct exceptionl excl;

-# if defined(__GNUC__)
+# ifdef __GNUC__
 #define ALIGNIT __attribute__ ((__aligned__ (16)))
 # elif defined opensource
 #define ALIGNIT
@ -138,47 +153,55 @@ struct exceptionl excl;
 # ifdef SIZE_LONG_INT_64
 #define __INT_64__ signed long
 # else
+# if ILP32
+#define __INT_64__ signed long long
+# else
 #define __INT_64__ __int64
 # endif
+# endif

-const char float_inf[4] = {0x00,0x00,0x80,0x7F};
-const char float_huge[4] = {0xFF,0xFF,0x7F,0x7F};
-const char float_zero[4] = {0x00,0x00,0x00,0x00};
-const char float_neg_inf[4] = {0x00,0x00,0x80,0xFF};
-const char float_neg_huge[4] = {0xFF,0xFF,0x7F,0xFF};
-const char float_neg_zero[4] = {0x00,0x00,0x00,0x80};
+
+#define STATIC static
+
+STATIC const char float_inf[4] = {0x00,0x00,0x80,0x7F};
+STATIC const char float_huge[4] = {0xFF,0xFF,0x7F,0x7F};
+STATIC const char float_zero[4] = {0x00,0x00,0x00,0x00};
+STATIC const char float_neg_inf[4] = {0x00,0x00,0x80,0xFF};
+STATIC const char float_neg_huge[4] = {0xFF,0xFF,0x7F,0xFF};
+STATIC const char float_neg_zero[4] = {0x00,0x00,0x00,0x80};
 ALIGNIT
-const char double_inf[8] = {0x00,0x00,0x00,0x00,0x00,0x00,0xF0,0x7F}; 
-#if 0 /* unused */
+STATIC const char double_inf[8] = {0x00,0x00,0x00,0x00,0x00,0x00,0xF0,0x7F};
+#ifndef _LIBC
 ALIGNIT
-const char double_huge[8] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xEF,0x7F};
+STATIC const char double_huge[8] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xEF,0x7F};
 #endif
 ALIGNIT
-const char double_zero[8] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
+STATIC const char double_zero[8] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
 ALIGNIT
-const char double_neg_inf[8] = {0x00,0x00,0x00,0x00,0x00,0x00,0xF0,0xFF}; 
-#if 0 /* unused */
+STATIC const char double_neg_inf[8] = {0x00,0x00,0x00,0x00,0x00,0x00,0xF0,0xFF};
+#ifndef _LIBC
 ALIGNIT
-const char double_neg_huge[8] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xEF,0xFF};
+STATIC const char double_neg_huge[8] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xEF,0xFF};
 #endif
 ALIGNIT
-const char double_neg_zero[8] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80};
+STATIC const char double_neg_zero[8] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80};
 ALIGNIT
-const char long_double_inf[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0xFF,0x7F,0x00,0x00,0x00,0x00,0x00,0x00}; 
-#if 0 /* unused */
+STATIC const char long_double_inf[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0xFF,0x7F,0x00,0x00,0x00,0x00,0x00,0x00};
 ALIGNIT
-const char long_double_huge[16] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE,0x7F,0x00,0x00,0x00,0x00,0x00,0x00};
+#ifndef _LIBC
+STATIC const char long_double_huge[16] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE,0x7F,0x00,0x00,0x00,0x00,0x00,0x00};
 #endif
 ALIGNIT
-const char long_double_zero[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
+STATIC const char long_double_zero[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
 ALIGNIT
-const char long_double_neg_inf[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00}; 
-#if 0 /* unused */
+STATIC const char long_double_neg_inf[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00};
 ALIGNIT
-const char long_double_neg_huge[16] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE,0xFF,0x00,0x00,0x00,0x00,0x00,0x00};
+#ifndef _LIBC
+STATIC const char long_double_neg_huge[16] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE,0xFF,0x00,0x00,0x00,0x00,0x00,0x00};
 #endif
 ALIGNIT
-const char long_double_neg_zero[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x00,0x00,0x00};
+STATIC const char long_double_neg_zero[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x00,0x00,0x00};
+

 #define RETVAL_HUGE_VALL *(long double *)retval =  *(long double *)long_double_inf
 #define RETVAL_NEG_HUGE_VALL *(long double *)retval = *(long double *)long_double_neg_inf
@ -195,6 +218,10 @@ const char long_double_neg_zero[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0
 #define RETVAL_HUGEF *(float *)retval = *(float *) float_huge
 #define RETVAL_NEG_HUGEF *(float *)retval = *(float *) float_neg_huge

+#define ZEROL_VALUE *(long double *)long_double_zero
+#define ZEROD_VALUE *(double *)double_zero
+#define ZEROF_VALUE *(float *)float_zero
+
 #define RETVAL_ZEROL *(long double *)retval = *(long double *)long_double_zero
 #define RETVAL_ZEROD *(double *)retval = *(double *)double_zero
 #define RETVAL_ZEROF *(float *)retval = *(float *)float_zero
@ -254,6 +281,7 @@ const char long_double_neg_zero[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0
 #define INPUT_RESL (*(long double *)retval)
 #define INPUT_RESD (*(double *)retval)
 #define INPUT_RESF (*(float *)retval)
+#define INPUT_RESI64 (*(__INT_64__ *)retval)

 #define WRITEL_LOG_ZERO fputs("logl: SING error\n",stderr)
 #define WRITED_LOG_ZERO fputs("log: SING error\n",stderr)
@ -271,7 +299,7 @@ const char long_double_neg_zero[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0
 #define WRITED_Y1_ZERO fputs("y1: DOMAIN error\n",stderr)
 #define WRITEF_Y1_ZERO fputs("y1f: DOMAIN error\n",stderr)
 #define WRITEL_Y1_NEGATIVE fputs("y1l: DOMAIN error\n",stderr)
-#define WRITED_Y1_NEGATIUE fputs("y1: DOMAIN error\n",stderr)
+#define WRITED_Y1_NEGATIVE fputs("y1: DOMAIN error\n",stderr)
 #define WRITEF_Y1_NEGATIVE fputs("y1f: DOMAIN error\n",stderr)
 #define WRITEL_YN_ZERO fputs("ynl: DOMAIN error\n",stderr)
 #define WRITED_YN_ZERO fputs("yn: DOMAIN error\n",stderr)
@ -339,9 +367,9 @@ const char long_double_neg_zero[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0
 #define WRITEL_GAMMA_NEGATIVE fputs("gammal: SING error\n",stderr)
 #define WRITED_GAMMA_NEGATIVE fputs("gamma: SING error\n",stderr)
 #define WRITEF_GAMMA_NEGATIVE fputs("gammaf: SING error\n",stderr)
-#define WRITEL_TGAMMA_NEGATIVE fputs("tgammal: DOMAIN error\n",stderr)
-#define WRITED_TGAMMA_NEGATIVE fputs("tgamma: DOMAIN error\n",stderr)
-#define WRITEF_TGAMMA_NEGATIVE fputs("tgammaf: DOMAIN error\n",stderr)
+#define WRITEL_TGAMMA_NEGATIVE fputs("tgammal: SING error\n",stderr)
+#define WRITED_TGAMMA_NEGATIVE fputs("tgamma: SING error\n",stderr)
+#define WRITEF_TGAMMA_NEGATIVE fputs("tgammaf: SING error\n",stderr)
 #define WRITEL_J0_TLOSS  fputs("j0l: TLOSS error\n",stderr)
 #define WRITEL_Y0_TLOSS  fputs("y0l: TLOSS error\n",stderr)
 #define WRITEL_J1_TLOSS  fputs("j1l: TLOSS error\n",stderr)
@ -428,9 +456,15 @@ else if(_LIB_VERSIONIMF==_ISOC_)
    case nextafterl_overflow:
    case nextafter_overflow:
    case nextafterf_overflow:
+    case nextafterl_underflow:
+    case nextafter_underflow:
+    case nextafterf_underflow:
    case nexttowardl_overflow:
    case nexttoward_overflow:
    case nexttowardf_overflow:
+    case nexttowardl_underflow:
+    case nexttoward_underflow:
+    case nexttowardf_underflow:
    case scalbnl_overflow:
    case scalbn_overflow:
    case scalbnf_overflow:
@ -496,6 +530,7 @@ else if(_LIB_VERSIONIMF==_ISOC_)
    }
    case powl_underflow:
    case expl_underflow:
+    case exp10l_underflow:
    case exp2l_underflow:
    case scalbl_underflow:
    case scalbnl_underflow:
@ -505,11 +540,14 @@ else if(_LIB_VERSIONIMF==_ISOC_)
    case annuityl_underflow:
    case compoundl_underflow:
    {
-       if ( *(__INT_64__*)retval == 0 ) ERRNO_RANGE; 
+       /* Test for zero by testing 64 significand bits for zero. An integer
+          test is needed so denormal flag is not set by a floating-point test */
+       if ( INPUT_RESI64 == 0 ) ERRNO_RANGE;
       break;
    }
    case pow_underflow:
    case exp_underflow:
+    case exp10_underflow:
    case exp2_underflow:
    case scalb_underflow:
    case scalbn_underflow:
@ -519,11 +557,14 @@ else if(_LIB_VERSIONIMF==_ISOC_)
    case annuity_underflow:
    case compound_underflow:
    {
-       if ( ((*(__INT_64__*)retval)<<1) == 0 ) ERRNO_RANGE; 
+       /* Test for zero by testing exp and significand bits for zero. An integer
+          test is needed so denormal flag is not set by a floating-point test */
+       if ( (INPUT_RESI64 << 1) == 0 ) ERRNO_RANGE;
       break;
    }
    case powf_underflow:
    case expf_underflow:
+    case exp10f_underflow:
    case exp2f_underflow:
    case scalbf_underflow:
    case scalbnf_underflow:
@ -533,7 +574,9 @@ else if(_LIB_VERSIONIMF==_ISOC_)
    case annuityf_underflow:
    case compoundf_underflow:
    {
-       if ( ((*(__INT_64__*)retval)<<33) == 0 ) ERRNO_RANGE; 
+       /* Test for zero by testing exp and significand bits for zero. An integer
+          test is needed so denormal flag is not set by a floating-point test */
+       if ( (INPUT_RESI64 << 33) == 0 ) ERRNO_RANGE;
       break;
    }
    case logl_negative:
@ -656,10 +699,10 @@ switch(input_tag)
       RETVAL_HUGE_VALF; ERRNO_RANGE; break;
  }
  case gammal_negative:
-  case lgammal_negative:
  case gamma_negative:
-  case lgamma_negative:
  case gammaf_negative:
+  case lgammal_negative:
+  case lgamma_negative:
  case lgammaf_negative:
  case tgammal_negative:
  case tgamma_negative:
@ -697,6 +740,18 @@ switch(input_tag)
  case sinhcoshl_overflow:
  case sinhcosh_overflow:
  case sinhcoshf_overflow:
+  case nextafterl_overflow:
+  case nextafter_overflow:
+  case nextafterf_overflow:
+  case nextafterl_underflow:
+  case nextafter_underflow:
+  case nextafterf_underflow:
+  case nexttowardl_overflow:
+  case nexttoward_overflow:
+  case nexttowardf_overflow:
+  case nexttowardl_underflow:
+  case nexttoward_underflow:
+  case nexttowardf_underflow:
  {
       ERRNO_RANGE; break;
  }
@ -767,7 +822,10 @@ switch(input_tag)
    /* y1l(x < 0) */
    /* ynl(x < 0) */
    {
-       RETVAL_NEG_HUGE_VALL; ERRNO_DOMAIN; break;
+#ifndef _LIBC
+       RETVAL_NEG_HUGE_VALL;
+#endif
+       ERRNO_DOMAIN; break;
    }
  case y0_negative:
  case y1_negative:
@ -792,8 +850,9 @@ switch(input_tag)
  case log10l_zero:
  case log2l_zero:
    /* logl(0) */
-    /* log1pl(0) */
+    /* log1pl(-1) */
    /* log10l(0) */
+    /* log2l(0) */
    {
       RETVAL_NEG_HUGE_VALL; ERRNO_RANGE; break;
    }
@ -802,8 +861,9 @@ switch(input_tag)
  case log10_zero:
  case log2_zero:
   /* log(0) */
-   /* log1p(0) */
+   /* log1p(-1) */
   /* log10(0) */
+   /* log2(0) */
    {
       RETVAL_NEG_HUGE_VALD; ERRNO_RANGE; break;
    }
@ -812,8 +872,9 @@ switch(input_tag)
  case log10f_zero:
  case log2f_zero:
    /* logf(0) */
-    /* log1pf(0) */
+    /* log1pf(-1) */
    /* log10f(0) */
+    /* log2f(0) */
    {
       RETVAL_NEG_HUGE_VALF; ERRNO_RANGE; break;
    }
@ -822,12 +883,10 @@ switch(input_tag)
  case log10l_negative:
  case log2l_negative:
    /* logl(x < 0) */
-    /* log1pl(x < 0) */
+    /* log1pl(x < -1) */
    /* log10l(x < 0) */
+    /* log2l(x < 0) */
    {
-#ifndef _LIBC
-       RETVAL_NEG_HUGE_VALL;
-#endif
       ERRNO_DOMAIN; break;
    }
  case log_negative:
@ -835,12 +894,10 @@ switch(input_tag)
  case log10_negative:
  case log2_negative:
    /* log(x < 0) */
-    /* log1p(x < 0) */
+    /* log1p(x < -1) */
    /* log10(x < 0) */
+    /* log2(x < 0) */
    {
-#ifndef _LIBC
-       RETVAL_NEG_HUGE_VALD;
-#endif
       ERRNO_DOMAIN; break;
    }
  case logf_negative:
@ -848,52 +905,63 @@ switch(input_tag)
  case log10f_negative:
  case log2f_negative:
    /* logf(x < 0) */
-    /* log1pf(x < 0) */
+    /* log1pf(x < -1) */
    /* log10f(x < 0) */
+    /* log2f(x < 0) */
    {
-#ifndef _LIBC
-       RETVAL_NEG_HUGE_VALF;
-#endif
       ERRNO_DOMAIN; break;
    }
  case expl_overflow:
-  case exp2l_overflow:
  case exp10l_overflow:
+  case exp2l_overflow:
    /* expl overflow */
+    /* exp10l overflow */
+    /* exp2l overflow */
    {
       RETVAL_HUGE_VALL; ERRNO_RANGE; break;
    }
  case exp_overflow:
-  case exp2_overflow:
  case exp10_overflow:
+  case exp2_overflow:
    /* exp overflow */
+    /* exp10 overflow */
+    /* exp2 overflow */
    {
       RETVAL_HUGE_VALD; ERRNO_RANGE; break;
    }
  case expf_overflow:
-  case exp2f_overflow:
  case exp10f_overflow:
+  case exp2f_overflow:
    /* expf overflow */
    {
       RETVAL_HUGE_VALF; ERRNO_RANGE; break;
    }
  case expl_underflow:
+  case exp10l_underflow:
  case exp2l_underflow:
    /* expl underflow */
+    /* exp10l underflow */
+    /* exp2l underflow */
    {
-       RETVAL_ZEROL; ERRNO_RANGE; break;
+       ERRNO_RANGE; break;
    }
  case exp_underflow:
+  case exp10_underflow:
  case exp2_underflow:
    /* exp underflow */
+    /* exp10 underflow */
+    /* exp2 underflow */
    {
-       RETVAL_ZEROD; ERRNO_RANGE; break;
+       ERRNO_RANGE; break;
    }
  case expf_underflow:
+  case exp10f_underflow:
  case exp2f_underflow:
    /* expf underflow */
+    /* exp10f underflow */
+    /* exp2f underflow */
    {
-       RETVAL_ZEROF; ERRNO_RANGE; break;
+       ERRNO_RANGE; break;
    }
  case j0l_gt_loss:
  case y0l_gt_loss:
@ -945,7 +1013,7 @@ switch(input_tag)
  case compoundl_overflow:
    /* powl(x,y) overflow */
    {
-       if (INPUT_RESL < 0) RETVAL_NEG_HUGE_VALL;
+       if (INPUT_RESL < ZEROL_VALUE /*0*/) RETVAL_NEG_HUGE_VALL;
       else RETVAL_HUGE_VALL;
       ERRNO_RANGE; break;
    }
@ -954,7 +1022,7 @@ switch(input_tag)
  case compound_overflow:
    /* pow(x,y) overflow */
    {
-       if (INPUT_RESD < 0) RETVAL_NEG_HUGE_VALD;
+       if (INPUT_RESD < ZEROD_VALUE /*0*/) RETVAL_NEG_HUGE_VALD;
       else RETVAL_HUGE_VALD;
       ERRNO_RANGE; break;
    }
@ -963,7 +1031,7 @@ switch(input_tag)
  case compoundf_overflow:
    /* powf(x,y) overflow */
    {
-       if (INPUT_RESF < 0) RETVAL_NEG_HUGE_VALF;
+       if (INPUT_RESF < ZEROF_VALUE /*0*/) RETVAL_NEG_HUGE_VALF;
       else RETVAL_HUGE_VALF;
       ERRNO_RANGE; break;
    }
@ -1051,51 +1119,24 @@ switch(input_tag)
    }
  case atan2l_zero:
  case atan2dl_zero:
+    /* atan2l(0,0) */
    /* atan2dl(0,0) */
    {
-#ifndef _LIBC
-       RETVAL_ZEROL;
-#else
-       /* XXX  arg1 and arg2 are switched!!!!  */
-       if (signbit (*(long double *) arg1))
-	 /* y == -0 */
-	 *(long double *) retval = __libm_copysignl (M_PIl, *(long double *) arg2);
-       else
-	 *(long double *) retval = *(long double *) arg2;
-#endif
-       ERRNO_DOMAIN; break;
+       break;
    }
  case atan2_zero:
  case atan2d_zero:
+    /* atan2(0,0) */
    /* atan2d(0,0) */
    {
-#ifndef _LIBC
-       RETVAL_ZEROD;
-#else
-       /* XXX  arg1 and arg2 are switched!!!!  */
-       if (signbit (*(double *) arg1))
-	 /* y == -0 */
-	 *(double *) retval = __libm_copysign (M_PI, *(double *) arg2);
-       else
-	 *(double *) retval = *(double *) arg2;
-#endif
-       ERRNO_DOMAIN; break;
+       break;
    }
  case atan2f_zero:
  case atan2df_zero:
    /* atan2f(0,0) */
    /* atan2df(0,0) */
    {
-#ifndef _LIBC
-       RETVAL_ZEROF;
-#else
-       if (signbit (*(float *) arg2))
-	 /* y == -0 */
-	 *(float *) retval = __libm_copysignf (M_PI, *(float *) arg1);
-       else
-	 *(float *) retval = *(float *) arg1;
-#endif
-       ERRNO_DOMAIN; break;
+       break;
    }
  case expm1l_overflow:
    /* expm1 overflow */
@ -1145,42 +1186,42 @@ switch(input_tag)
  case scalbl_underflow:
    /* scalbl underflow */
    {
-       if (INPUT_XL < 0) RETVAL_NEG_ZEROL; 
+       if (INPUT_XL < ZEROL_VALUE /*0*/) RETVAL_NEG_ZEROL;
       else RETVAL_ZEROL;
       ERRNO_RANGE; break;
    }
  case scalb_underflow:
    /* scalb underflow */
    {
-       if (INPUT_XD < 0) RETVAL_NEG_ZEROD; 
+       if (INPUT_XD < ZEROD_VALUE /*0*/) RETVAL_NEG_ZEROD;
       else RETVAL_ZEROD;
       ERRNO_RANGE; break;
    }
  case scalbf_underflow:
    /* scalbf underflow */
    {
-       if (INPUT_XF < 0) RETVAL_NEG_ZEROF; 
+       if (INPUT_XF < ZEROF_VALUE /*0*/) RETVAL_NEG_ZEROF;
       else RETVAL_ZEROF;
       ERRNO_RANGE; break;
    }
  case scalbl_overflow:
    /* scalbl overflow */
    {
-       if (INPUT_XL < 0) RETVAL_NEG_HUGE_VALL; 
+       if (INPUT_XL < ZEROL_VALUE /*0*/) RETVAL_NEG_HUGE_VALL;
       else RETVAL_HUGE_VALL;
       ERRNO_RANGE; break;
    }
  case scalb_overflow:
    /* scalb overflow */
    {
-       if (INPUT_XD < 0) RETVAL_NEG_HUGE_VALD; 
+       if (INPUT_XD < ZEROD_VALUE /*0*/) RETVAL_NEG_HUGE_VALD;
       else RETVAL_HUGE_VALD;
       ERRNO_RANGE; break;
    }
  case scalbf_overflow:
    /* scalbf overflow */
    {
-       if (INPUT_XF < 0) RETVAL_NEG_HUGE_VALF; 
+       if (INPUT_XF < ZEROF_VALUE /*0*/) RETVAL_NEG_HUGE_VALF;
       else RETVAL_HUGE_VALF;
       ERRNO_RANGE; break;
    }
@ -1204,9 +1245,6 @@ switch(input_tag)
    /* acosl(x > 1) */
    /* acosdl(x > 1) */
    {
-#ifndef _LIBC
-       RETVAL_ZEROL;
-#endif
       ERRNO_DOMAIN; break;
    }
  case acos_gt_one:
@ -1214,9 +1252,6 @@ switch(input_tag)
    /* acos(x > 1) */
    /* acosd(x > 1) */
    {
-#ifndef _LIBC
-       RETVAL_ZEROD;
-#endif
       ERRNO_DOMAIN; break;
    }
  case acosf_gt_one:
@ -1224,9 +1259,6 @@ switch(input_tag)
    /* acosf(x > 1) */
    /* acosdf(x > 1) */
    {
-#ifndef _LIBC
-       RETVAL_ZEROF;
-#endif
       ERRNO_DOMAIN; break;
    }
  case asinl_gt_one:
@ -1234,9 +1266,6 @@ switch(input_tag)
    /* asinl(x > 1) */
    /* asindl(x > 1) */
    {
-#ifndef _LIBC
-       RETVAL_ZEROL;
-#endif
       ERRNO_DOMAIN; break;
    }
  case asin_gt_one:
@ -1244,18 +1273,13 @@ switch(input_tag)
    /* asin(x > 1) */
    /* asind(x > 1) */
    {
-#ifndef _LIBC
-       RETVAL_ZEROD;
-#endif
       ERRNO_DOMAIN; break;
    }
  case asinf_gt_one:
  case asindf_gt_one:
+    /* asinf(x > 1) */
    /* asindf(x > 1) */
    {
-#ifndef _LIBC
-       RETVAL_ZEROF;
-#endif
       ERRNO_DOMAIN; break;
    }
  case remainderl_by_zero:
@ -1291,33 +1315,24 @@ switch(input_tag)
    {
       RETVAL_HUGE_VALF; ERRNO_RANGE; break;
    }
-  case nextafterl_overflow:
-  case nextafter_overflow:
-  case nextafterf_overflow:
-  case nexttowardl_overflow:
-  case nexttoward_overflow:
-  case nexttowardf_overflow:
-    {
-      ERRNO_RANGE; break;
-    }
  case sinhl_overflow:
    /* sinhl overflows */
    {
-       if (INPUT_XL > 0) RETVAL_HUGE_VALL;
+       if (INPUT_XL > ZEROL_VALUE /*0*/) RETVAL_HUGE_VALL;
       else RETVAL_NEG_HUGE_VALL;
       ERRNO_RANGE; break;
    }
  case sinh_overflow:
    /* sinh overflows */
    {
-       if (INPUT_XD > 0) RETVAL_HUGE_VALD;
+       if (INPUT_XD > ZEROD_VALUE /*0*/) RETVAL_HUGE_VALD;
       else RETVAL_NEG_HUGE_VALD;
       ERRNO_RANGE; break;
    }
  case sinhf_overflow:
    /* sinhf overflows */
    {
-       if (INPUT_XF > 0) RETVAL_HUGE_VALF;
+       if (INPUT_XF > ZEROF_VALUE /*0*/) RETVAL_HUGE_VALF;
       else RETVAL_NEG_HUGE_VALF;
       ERRNO_RANGE; break;
    }
@ -1862,6 +1877,27 @@ else
       *(float *)retval = excf.retval;
       break;
    }
+  case log2l_zero:
+    /* log2l(0) */
+    {
+       SINGL; NAMEL = (char *) "log2l";
+       ifSVID
+       {
+         RETVAL_NEG_HUGEL;
+         NOT_MATHERRL
+         {
+           WRITEL_LOG2_ZERO;
+           ERRNO_DOMAIN;
+         }
+       }
+       else
+       {
+         RETVAL_NEG_HUGE_VALL;
+         NOT_MATHERRL {ERRNO_DOMAIN;}
+       }
+       *(long double *)retval = excl.retval;
+       break;
+    }
  case log2_zero:
    /* log2(0) */
    {
@ -2096,12 +2132,12 @@ else
       OVERFLOWL; NAMEL = (char *) "powl";
       ifSVID
       {
-         if (INPUT_XL < 0)  RETVAL_NEG_HUGEL;
+         if (INPUT_RESL < ZEROL_VALUE /*0*/) RETVAL_NEG_HUGEL;
         else RETVAL_HUGEL;
       }
       else
       {
-         if (INPUT_XL < 0) RETVAL_NEG_HUGE_VALL;
+         if (INPUT_RESL < ZEROL_VALUE /*0*/) RETVAL_NEG_HUGE_VALL;
         else RETVAL_HUGE_VALL;
       }
       NOT_MATHERRL {ERRNO_RANGE;}
@ -2114,12 +2150,12 @@ else
       OVERFLOWD; NAMED = (char *) "pow";
       ifSVID
       {
-         if (INPUT_XD < 0) RETVAL_NEG_HUGED;
+         if (INPUT_RESD < ZEROD_VALUE /*0*/) RETVAL_NEG_HUGED;
         else RETVAL_HUGED;
       }
       else
       {
-         if (INPUT_XD < 0) RETVAL_NEG_HUGE_VALD;
+         if (INPUT_RESD < ZEROD_VALUE /*0*/) RETVAL_NEG_HUGE_VALD;
         else RETVAL_HUGE_VALD;
       }
       NOT_MATHERRD {ERRNO_RANGE;}
@ -2132,12 +2168,12 @@ else
       OVERFLOWF; NAMEF = (char *) "powf";
       ifSVID
       {
-         if (INPUT_XF < 0) RETVAL_NEG_HUGEF;
+         if (INPUT_RESF < ZEROF_VALUE /*0*/) RETVAL_NEG_HUGEF;
         else RETVAL_HUGEF;
       }
       else
       {
-         if (INPUT_XF < 0) RETVAL_NEG_HUGE_VALF;
+         if (INPUT_RESF < ZEROF_VALUE /*0*/) RETVAL_NEG_HUGE_VALF;
         else RETVAL_HUGE_VALF;
       }
       NOT_MATHERRF {ERRNO_RANGE;}
@ -2214,7 +2250,6 @@ else
    /* 0**neg */
    {
       DOMAINF; NAMEF = (char *) "powf";
-       RETVAL_NEG_HUGE_VALF;
       ifSVID
       {
         RETVAL_ZEROF;
@ -2238,7 +2273,7 @@ else
       DOMAINL; NAMEL = (char *) "powl";
       ifSVID
       {
-         RETVAL_ZEROF;
+         RETVAL_ZEROL;
         NOT_MATHERRL
         {
           WRITEL_POW_NEG_TO_NON_INTEGER;
@ -2360,11 +2395,13 @@ else
       DOMAINF; NAMEF = (char *) "atan2f";
       RETVAL_ZEROF;
       NOT_MATHERRF
+       {
         ifSVID
         {
            WRITEF_ATAN2_ZERO_BY_ZERO;
         }
         ERRNO_DOMAIN;
+       }
       *(float *)retval = excf.retval;
       break;
    }
@ -2406,11 +2443,13 @@ else
       DOMAINF; NAMEF = (char *) "atan2df";
       RETVAL_ZEROF;
       NOT_MATHERRF
+       {
         ifSVID
         {
            WRITEF_ATAN2D_ZERO_BY_ZERO;
         }
         ERRNO_DOMAIN;
+       }
       *(float *)retval = excf.retval;
       break;
    }
@ -2446,7 +2485,7 @@ else
    /* scalbl underflow */
    {
       UNDERFLOWL; NAMEL = (char *) "scalbl";
-       if (INPUT_XL < 0.0L) RETVAL_NEG_ZEROL;
+       if (INPUT_XL < ZEROL_VALUE /*0.0L*/) RETVAL_NEG_ZEROL;
       else  RETVAL_ZEROL;
       NOT_MATHERRL {ERRNO_RANGE;}
       *(long double *)retval = excl.retval;
@ -2456,7 +2495,7 @@ else
    /* scalb underflow */
    {
       UNDERFLOWD; NAMED = (char *) "scalb";
-       if (INPUT_XD < 0.0) RETVAL_NEG_ZEROD;
+       if (INPUT_XD < ZEROD_VALUE /*0.0*/) RETVAL_NEG_ZEROD;
       else  RETVAL_ZEROD;
       NOT_MATHERRD {ERRNO_RANGE;}
       *(double *)retval = exc.retval;
@ -2466,7 +2505,7 @@ else
    /* scalbf underflow */
    {
       UNDERFLOWF; NAMEF = (char *) "scalbf";
-       if (INPUT_XF < 0.0) RETVAL_NEG_ZEROF;
+       if (INPUT_XF < ZEROF_VALUE /*0.0*/) RETVAL_NEG_ZEROF;
       else  RETVAL_ZEROF;
       NOT_MATHERRF {ERRNO_RANGE;}
       *(float *)retval = excf.retval;
@ -2476,7 +2515,7 @@ else
    /* scalbl overflow */
    {
       OVERFLOWL; NAMEL = (char *) "scalbl";
-       if (INPUT_XL < 0) RETVAL_NEG_HUGE_VALL;
+       if (INPUT_XL < ZEROL_VALUE /*0*/) RETVAL_NEG_HUGE_VALL;
       else RETVAL_HUGE_VALL;
       NOT_MATHERRL {ERRNO_RANGE;}
       *(long double *)retval = excl.retval;
@ -2486,7 +2525,7 @@ else
    /* scalb overflow */
    {
       OVERFLOWD; NAMED = (char *) "scalb";
-       if (INPUT_XD < 0) RETVAL_NEG_HUGE_VALD;
+       if (INPUT_XD < ZEROD_VALUE /*0*/) RETVAL_NEG_HUGE_VALD;
       else RETVAL_HUGE_VALD;
       NOT_MATHERRD {ERRNO_RANGE;}
       *(double *)retval = exc.retval;
@ -2496,7 +2535,7 @@ else
    /* scalbf overflow */
    {
       OVERFLOWF; NAMEF = (char *) "scalbf";
-       if (INPUT_XF < 0) RETVAL_NEG_HUGE_VALF;
+       if (INPUT_XF < ZEROF_VALUE /*0*/) RETVAL_NEG_HUGE_VALF;
       else RETVAL_HUGE_VALF;
       NOT_MATHERRF {ERRNO_RANGE;}
       *(float *)retval = excf.retval;
@ -2844,12 +2883,12 @@ else
      OVERFLOWL; NAMEL = (char *) "sinhl";
      ifSVID
      {
-        if (INPUT_XL > 0.0) RETVAL_HUGEL;
+        if (INPUT_XL > ZEROL_VALUE /*0.0*/) RETVAL_HUGEL;
        else RETVAL_NEG_HUGEL;
      }
      else
      {
-        if (INPUT_XL > 0.0) RETVAL_HUGE_VALL;
+        if (INPUT_XL > ZEROL_VALUE /*0.0*/) RETVAL_HUGE_VALL;
        else RETVAL_NEG_HUGE_VALL;
      }
      NOT_MATHERRL {ERRNO_RANGE;}
@ -2862,12 +2901,12 @@ else
      OVERFLOWD; NAMED = (char *) "sinh";
      ifSVID
      {
-        if (INPUT_XD > 0.0) RETVAL_HUGED;
+        if (INPUT_XD > ZEROD_VALUE /*0.0*/) RETVAL_HUGED;
        else RETVAL_NEG_HUGED;
      }
      else
      {
-        if (INPUT_XD > 0.0) RETVAL_HUGE_VALD;
+        if (INPUT_XD > ZEROD_VALUE /*0.0*/) RETVAL_HUGE_VALD;
        else RETVAL_NEG_HUGE_VALD;
      }
      NOT_MATHERRD {ERRNO_RANGE;}
@ -2880,12 +2919,12 @@ else
      OVERFLOWF; NAMEF = (char *) "sinhf";
      ifSVID
      {
-        if( INPUT_XF > 0.0) RETVAL_HUGEF;
+        if (INPUT_XF > ZEROF_VALUE /*0.0*/) RETVAL_HUGEF;
        else RETVAL_NEG_HUGEF;
      }
      else
      {
-        if (INPUT_XF > 0.0) RETVAL_HUGE_VALF;
+        if (INPUT_XF > ZEROF_VALUE /*0.0*/) RETVAL_HUGE_VALF;
        else RETVAL_NEG_HUGE_VALF;
      }
      NOT_MATHERRF {ERRNO_RANGE;}
@ -2919,7 +2958,7 @@ else
       {
         NOT_MATHERRD
         {
-          WRITEL_ACOSH;
+          WRITED_ACOSH;
          ERRNO_DOMAIN;
         }
       }
@ -2947,7 +2986,7 @@ else
         NOT_MATHERRF {ERRNO_DOMAIN;}
       }
       *(float *)retval = excf.retval;
-       ERRNO_DOMAIN; break;
+       break;
    }
  case atanhl_gt_one:
    /* atanhl(|x| > 1) */
@ -3225,7 +3264,7 @@ else
         RETVAL_HUGEL;
         NOT_MATHERRL
         {
-            WRITEL_GAMMA_NEGATIVE;
+           WRITEL_LGAMMA_NEGATIVE;
           ERRNO_DOMAIN;
         }
       }
@ -3801,7 +3840,8 @@ else
           WRITEF_Y1_ZERO;
           ERRNO_DOMAIN;
         }
-       }else
+       }
+       else
       {
         RETVAL_NEG_HUGE_VALF;
         NOT_MATHERRF {ERRNO_DOMAIN;}
@ -4025,7 +4065,7 @@ else
         RETVAL_NEG_HUGED;
         NOT_MATHERRD
         {
-           WRITED_Y1_NEGATIUE;
+           WRITED_Y1_NEGATIVE;
           ERRNO_DOMAIN;
         }
       }
--- a/sysdeps/ia64/fpu/libm_lgamma.S
+++ b/sysdeps/ia64/fpu/libm_lgamma.S
@ -47,6 +47,7 @@
 // 09/15/02  Fixed bug on the branch lgamma_negrecursion
 // 10/21/02  Now it returns SIGN(GAMMA(x))=-1 for negative zero
 // 02/10/03  Reordered header: .section, .global, .proc, .align
+// 07/22/03  Reformatted some data tables
 //
 //*********************************************************************
 //
@ -951,19 +952,32 @@ data8 0xD28D3312983E98A0,0xBFFF //S2
 //
 data8 0x8090F777D7942F73,0x4001 // PR01
 data8 0xE5B521193CF61E63,0x4000 // PR11
-data8 0xC02C000000001939,0x0000000000000233 // (-15;-14)
-data8 0xC02A000000016124,0x0000000000002BFB // (-14;-13)
-data8 0xC02800000011EED9,0x0000000000025CBB // (-13;-12)
-data8 0xC026000000D7322A,0x00000000001E1095 // (-12;-11)
-data8 0xC0240000093F2777,0x00000000013DD3DC // (-11;-10)
-data8 0xC02200005C7768FB,0x000000000C9539B9 // (-10;-9)
-data8 0xC02000034028B3F9,0x000000007570C565 // (-9;-8)
-data8 0xC01C0033FDEDFE1F,0x00000007357E670E // (-8;-7)
-data8 0xC018016B25897C8D,0x000000346DC5D639 // (-7;-6)
-data8 0xC014086A57F0B6D9,0x0000010624DD2F1B // (-6;-5)
-data8 0xC010284E78599581,0x0000051EB851EB85 // (-5;-4)
-data8 0xC009260DBC9E59AF,0x000028F5C28F5C29 // (-4;-3)
-data8 0xC003A7FC9600F86C,0x0000666666666666 // (-3;-2)
+data8 0xC02C000000001939 // (-15;-14)
+data8 0x0000000000000233 // (-15;-14)
+data8 0xC02A000000016124 // (-14;-13)
+data8 0x0000000000002BFB // (-14;-13)
+data8 0xC02800000011EED9 // (-13;-12)
+data8 0x0000000000025CBB // (-13;-12)
+data8 0xC026000000D7322A // (-12;-11)
+data8 0x00000000001E1095 // (-12;-11)
+data8 0xC0240000093F2777 // (-11;-10)
+data8 0x00000000013DD3DC // (-11;-10)
+data8 0xC02200005C7768FB // (-10;-9)
+data8 0x000000000C9539B9 // (-10;-9)
+data8 0xC02000034028B3F9 // (-9;-8)
+data8 0x000000007570C565 // (-9;-8)
+data8 0xC01C0033FDEDFE1F // (-8;-7)
+data8 0x00000007357E670E // (-8;-7)
+data8 0xC018016B25897C8D // (-7;-6)
+data8 0x000000346DC5D639 // (-7;-6)
+data8 0xC014086A57F0B6D9 // (-6;-5)
+data8 0x0000010624DD2F1B // (-6;-5)
+data8 0xC010284E78599581 // (-5;-4)
+data8 0x0000051EB851EB85 // (-5;-4)
+data8 0xC009260DBC9E59AF // (-4;-3)
+data8 0x000028F5C28F5C29 // (-4;-3)
+data8 0xC003A7FC9600F86C // (-3;-2)
+data8 0x0000666666666666 // (-3;-2)
 data8 0xCC15879606130890,0x4000 // PR21
 data8 0xB42FE3281465E1CC,0x4000 // PR31
 //
@ -971,19 +985,32 @@ data8 0x828185F0B95C9916,0x4001 // PR00
 //
 data8 0xD4D3C819E4E5654B,0x4000 // PR10
 data8 0xA82FBBA4FCC75298,0x4000 // PR20
-data8 0xC02DFFFFFFFFFE52,0x000000000000001C // (-15;-14)
-data8 0xC02BFFFFFFFFE6C7,0x00000000000001A6 // (-14;-13)
-data8 0xC029FFFFFFFE9EDC,0x0000000000002BFB // (-13;-12)
-data8 0xC027FFFFFFEE1127,0x000000000001EEC8 // (-12;-11)
-data8 0xC025FFFFFF28CDD4,0x00000000001E1095 // (-11;-10)
-data8 0xC023FFFFF6C0D7C0,0x000000000101B2B3 // (-10;-9)
-data8 0xC021FFFFA3884BD0,0x000000000D6BF94D // (-9;-8)
-data8 0xC01FFFF97F8159CF,0x00000000C9539B89 // (-8;-7)
-data8 0xC01BFFCBF76B86F0,0x00000007357E670E // (-7;-6)
-data8 0xC017FE92F591F40D,0x000000346DC5D639 // (-6;-5)
-data8 0xC013F7577A6EEAFD,0x00000147AE147AE1 // (-5;-4)
-data8 0xC00FA471547C2FE5,0x00000C49BA5E353F // (-4;-3)
-data8 0xC005FB410A1BD901,0x000053F7CED91687 // (-3;-2)
+data8 0xC02DFFFFFFFFFE52 // (-15;-14)
+data8 0x000000000000001C // (-15;-14)
+data8 0xC02BFFFFFFFFE6C7 // (-14;-13)
+data8 0x00000000000001A6 // (-14;-13)
+data8 0xC029FFFFFFFE9EDC // (-13;-12)
+data8 0x0000000000002BFB // (-13;-12)
+data8 0xC027FFFFFFEE1127 // (-12;-11)
+data8 0x000000000001EEC8 // (-12;-11)
+data8 0xC025FFFFFF28CDD4 // (-11;-10)
+data8 0x00000000001E1095 // (-11;-10)
+data8 0xC023FFFFF6C0D7C0 // (-10;-9)
+data8 0x000000000101B2B3 // (-10;-9)
+data8 0xC021FFFFA3884BD0 // (-9;-8)
+data8 0x000000000D6BF94D // (-9;-8)
+data8 0xC01FFFF97F8159CF // (-8;-7)
+data8 0x00000000C9539B89 // (-8;-7)
+data8 0xC01BFFCBF76B86F0 // (-7;-6)
+data8 0x00000007357E670E // (-7;-6)
+data8 0xC017FE92F591F40D // (-6;-5)
+data8 0x000000346DC5D639 // (-6;-5)
+data8 0xC013F7577A6EEAFD // (-5;-4)
+data8 0x00000147AE147AE1 // (-5;-4)
+data8 0xC00FA471547C2FE5 // (-4;-3)
+data8 0x00000C49BA5E353F // (-4;-3)
+data8 0xC005FB410A1BD901 // (-3;-2)
+data8 0x000053F7CED91687 // (-3;-2)
 data8 0x80151BB918A293AA,0x4000 // PR30
 data8 0xB3C9F8F47422A314,0x400B // PRN
 //
@ -3538,6 +3565,7 @@ lgamma_libm_err:
 };;
 GLOBAL_LIBM_END(__libm_lgamma)

+
 LOCAL_LIBM_ENTRY(__libm_error_region)
 .prologue
 { .mfi
--- a/sysdeps/ia64/fpu/libm_lgammaf.S
+++ b/sysdeps/ia64/fpu/libm_lgammaf.S
@ -47,6 +47,7 @@
 // 09/16/02  Improved accuracy on intervals reduced to [1;1.25]
 // 10/21/02  Now it returns SIGN(GAMMA(x))=-1 for negative zero
 // 02/10/03  Reordered header: .section, .global, .proc, .align
+// 07/22/03  Reformatted some data tables
 //
 //*********************************************************************
 //
@ -685,19 +686,26 @@ data8 0x3FF1029A9DD542B4,0xBFFAD37C209D3B25 // A6,A5
 data8 0x405385E6FD9BE7EA // A0
 data8 0x478895F1C0000000 // Overflow boundary
 data8 0x400062D97D26B523,0xC00A03E1529FF023 // A6,A5
-data8 0x4069204C51E566CE,0 // A0
+data8 0x4069204C51E566CE // A0
+data8 0x0000000000000000 // pad
 data8 0x40101476B38FD501,0xC0199DE7B387C0FC // A6,A5
-data8 0x407EB8DAEC83D759,0 // A0
+data8 0x407EB8DAEC83D759 // A0
+data8 0x0000000000000000 // pad
 data8 0x401FDB008D65125A,0xC0296B506E665581 // A6,A5
-data8 0x409226D93107EF66,0 // A0
+data8 0x409226D93107EF66 // A0
+data8 0x0000000000000000 // pad
 data8 0x402FB3EAAF3E7B2D,0xC039521142AD8E0D // A6,A5
-data8 0x40A4EFA4F072792E,0 // A0
+data8 0x40A4EFA4F072792E // A0
+data8 0x0000000000000000 // pad
 data8 0x403FA024C66B2563,0xC0494569F250E691 // A6,A5
-data8 0x40B7B747C9235BB8,0 // A0
+data8 0x40B7B747C9235BB8 // A0
+data8 0x0000000000000000 // pad
 data8 0x404F9607D6DA512C,0xC0593F0B2EDDB4BC // A6,A5
-data8 0x40CA7E29C5F16DE2,0 // A0
+data8 0x40CA7E29C5F16DE2 // A0
+data8 0x0000000000000000 // pad
 data8 0x405F90C5F613D98D,0xC0693BD130E50AAF // A6,A5
-data8 0x40DD4495238B190C,0 // A0
+data8 0x40DD4495238B190C // A0
+data8 0x0000000000000000 // pad
 //
 // polynomial approximation of ln(sin(Pi*x)/(Pi*x)), |x| <= 0.5
 data8 0xBFD58731A486E820,0xBFA4452CC28E15A9 // S16,S14
@ -2133,6 +2141,7 @@ lgammaf_libm_err:
 };;
 GLOBAL_LIBM_END(__libm_lgammaf)

+
 LOCAL_LIBM_ENTRY(__libm_error_region)
 .prologue
 { .mfi
--- a/sysdeps/ia64/fpu/libm_lgammal.S
+++ b/sysdeps/ia64/fpu/libm_lgammal.S
@ -7622,6 +7622,7 @@ lgammal_singularity:
 GLOBAL_LIBM_END(__libm_lgammal)


+
 LOCAL_LIBM_ENTRY(__libm_error_region)
 .prologue
 { .mfi
--- a/sysdeps/ia64/fpu/libm_scalblnf.S
+++ b/sysdeps/ia64/fpu/libm_scalblnf.S
@ -44,38 +44,51 @@
 // 02/06/02 Corrected to handle 32- or 64-bit integers
 // 05/20/02 Cleaned up namespace and sf0 syntax
 // 02/10/03 Reordered header: .section, .global, .proc, .align
+// 08/25/03 Improved performance
 //
 // API
 //==============================================================
-// float = __libm_scalblnf  (float x, long int n, int long_int_type) 
+// float __libm_scalblnf  (float x, long int n, int long_int_type) 
 // input  floating point f8 and long int n (r33) 
 // input  long_int_type = 0 if long int defined as 32 bits, = 1 if 64 bits
-// 
 // output floating point f8
 //
-
 // Returns x* 2**n using an fma and detects overflow
 // and underflow.
 //
 //
+// Strategy:
+//  Compute biased exponent of result exp_Result = N + exp_X
+//  Break into ranges:
+//   exp_Result > 0x1007e                 -> Certain overflow
+//   exp_Result = 0x1007e                 -> Possible overflow
+//   0x0ff81 <= exp_Result < 0x1007e      -> No over/underflow (main path)
+//   0x0ff81 - 23 <= exp_Result < 0x0ff81 -> Possible underflow
+//   exp_Result < 0x0ff81 - 23            -> Certain underflow

 FR_Big         = f6
 FR_NBig        = f7
 FR_Floating_X  = f8
 FR_Result      = f8
 FR_Result2     = f9
-FR_Result3     = f11
-FR_Norm_X      = f12
-FR_Two_N       = f14
-FR_Two_to_Big  = f15
+FR_Result3     = f10
+FR_Norm_X      = f11
+FR_Two_N       = f12

+GR_neg_ov_limit= r14
 GR_N_Biased    = r15
 GR_Big         = r16
 GR_NBig        = r17
-GR_Scratch     = r18
-GR_Scratch1    = r19
+GR_exp_Result  = r18
+GR_pos_ov_limit= r19
 GR_Bias        = r20
 GR_N_as_int    = r21
+GR_signexp_X   = r22
+GR_exp_X       = r23
+GR_exp_mask    = r24
+GR_max_exp     = r25
+GR_min_exp     = r26
+GR_min_den_exp = r27

 GR_SAVE_B0          = r32
 GR_SAVE_GP          = r33
@ -93,105 +106,142 @@ GLOBAL_LIBM_ENTRY(__libm_scalblnf)
 //   Build the exponent Bias
 //
 {    .mfi
-     alloc         r32=ar.pfs,3,0,4,0
-     fclass.m.unc  p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
-     addl          GR_Bias = 0x0FFFF,r0
+     getf.exp      GR_signexp_X = FR_Floating_X // Get signexp of x
+     fclass.m      p6,p0 = FR_Floating_X, 0xe7  // @snan | @qnan | @inf | @zero
+     mov           GR_Bias = 0x0ffff
 }
-
-
 //
-//   Is N zero?
 //   Normalize x
-//   Do we need to sign extend input (long_int_type = 0)?
+//   Is long integer type 32 bits?
 //
 {    .mfi
-     cmp.eq.unc    p6,p0 = r33,r0  
+     mov           GR_Big = 35000      // If N this big then certain overflow
     fnorm.s1      FR_Norm_X = FR_Floating_X
-     cmp.eq.unc    p8,p9 = r34,r0  
+     cmp.eq        p8,p9 = r34,r0
 }
 ;;

-{    .mii
-(p9) mov           GR_N_as_int = r33       // Get n directly if long int 64 bits
-(p8) sxt4          GR_N_as_int = r33       // Sign extend n if long int 32 bits
-     nop.i         0
-}
-;;
-
-//
-//   Normalize x
-//   Branch and return special values.
-//   Create -35000
-//   Create 35000
-//
+//   Sign extend N if long int is 32 bits
 {    .mfi
-     addl          GR_Big = 35000,r0
-     nop.f         0
-     add           GR_N_Biased = GR_Bias,GR_N_as_int
+(p9) mov           GR_N_as_int = r33     // Copy N if long int is 64 bits
+     fclass.m      p9,p0 = FR_Floating_X, 0x0b // Test for x=unorm
+(p8) sxt4          GR_N_as_int = r33     // Sign extend N if long int is 32 bits
 }
-{    .mfb
-     addl          GR_NBig = -35000,r0
-(p7) fma.s.s0      FR_Result = FR_Floating_X,f1, f0 
-(p7) br.ret.spnt   b0  
-};;
+{ .mfi
+     mov           GR_NBig = -35000    // If N this small then certain underflow
+     nop.f         0
+     mov           GR_max_exp = 0x1007e      // Exponent of maximum float
+}
+;;

-//
-//   Build the exponent Bias
-//   Return x when N = 0
-//
+//   Create biased exponent for 2**N
+{    .mfi
+     add           GR_N_Biased = GR_Bias,GR_N_as_int
+     nop.f         0
+     cmp.ge        p7, p0 = GR_N_as_int, GR_Big  // Certain overflow?
+}
+{    .mib
+     cmp.le        p8, p0 = GR_N_as_int, GR_NBig // Certain underflow?
+     mov           GR_min_exp = 0x0ff81      // Exponent of minimum float
+(p9) br.cond.spnt  SCALBNF_UNORM              // Branch if x=unorm
+}
+;;
+
+SCALBNF_COMMON:
+// Main path continues.  Also return here from x=unorm path.
+//   Create 2**N
+.pred.rel "mutex",p7,p8
 {    .mfi
     setf.exp      FR_Two_N = GR_N_Biased
     nop.f         0
-     addl          GR_Scratch1  = 0x063BF,r0 
+(p7) mov           GR_N_as_int = GR_Big      // Limit max N
+}
+{    .mfi
+(p8) mov           GR_N_as_int = GR_NBig     // Limit min N
+     nop.f         0
+(p8) cmp.eq        p7,p0 = r0,r0             // Set p7 if |N| big
+}
+;;
+
+//
+//   Create biased exponent for 2**N for N big
+//   Is N zero?
+//
+{    .mfi
+(p7) add           GR_N_Biased = GR_Bias,GR_N_as_int
+     nop.f         0
+     cmp.eq.or     p6,p0 = r33,r0
+}
+{    .mfi
+     mov           GR_pos_ov_limit = 0x1007f // Exponent for positive overflow
+     nop.f         0
+     mov           GR_exp_mask = 0x1ffff     // Exponent mask
+}
+;;
+
+//
+//   Create 2**N for N big
+//   Return x when N = 0 or X = Nan, Inf, Zero
+//
+{    .mfi
+(p7) setf.exp      FR_Two_N = GR_N_Biased
+     nop.f         0
+     mov           GR_min_den_exp = 0x0ff81 - 23 // Exponent of min denorm float
 }
 {    .mfb
-     addl          GR_Scratch  = 0x019C3F,r0 
+     and           GR_exp_X = GR_exp_mask, GR_signexp_X
 (p6) fma.s.s0      FR_Result = FR_Floating_X, f1, f0
 (p6) br.ret.spnt   b0
-};;
+}
+;;

 //
-//   Create 2*big
-//   Create 2**-big 
-//   Is N > 35000     
-//   Is N < -35000     
 //   Raise Denormal operand flag with compare
-//   Main path, create 2**N
+//   Compute biased result exponent
 //
 {    .mfi
-     setf.exp      FR_NBig = GR_Scratch1                  
-     nop.f         0
-     cmp.ge.unc    p6, p0 = GR_N_as_int, GR_Big
-}
-{    .mfi
-     setf.exp      FR_Big = GR_Scratch                  
+     add           GR_exp_Result = GR_exp_X, GR_N_as_int
     fcmp.ge.s0    p0,p11 = FR_Floating_X,f0
-     cmp.le.unc    p8, p0 = GR_N_as_int, GR_NBig
-};;
+     mov           GR_neg_ov_limit = 0x3007f // Exponent for negative overflow
+}
+;;

 //
-//   Adjust 2**N if N was very small or very large
+//   Do final operation
 //
 {    .mfi
-     nop.m 0
-(p6) fma.s1        FR_Two_N = FR_Big,f1,f0
-     nop.i 0
+     cmp.lt        p7,p6 = GR_exp_Result, GR_max_exp  // Test no overflow
+     fma.s.s0      FR_Result = FR_Two_N,FR_Norm_X,f0
+     cmp.lt        p9,p0 = GR_exp_Result, GR_min_den_exp // Test sure underflow
 }
-{ .mlx
-     nop.m 999
-     movl          GR_Scratch = 0x000000000003007F 
-};;
-
-
-{    .mfi
+{    .mfb
     nop.m         0
-(p8) fma.s1        FR_Two_N = FR_NBig,f1,f0
-     nop.i 0
+     nop.f         0
+(p9) br.cond.spnt  SCALBNF_UNDERFLOW           // Branch if certain underflow
 }
-{    .mlx
-     nop.m 999
-     movl          GR_Scratch1= 0x000000000001007F 
-};;
+;;
+
+{    .mib
+(p6) cmp.gt.unc    p6,p8 = GR_exp_Result, GR_max_exp  // Test sure overflow
+(p7) cmp.ge.unc    p7,p9 = GR_exp_Result, GR_min_exp  // Test no over/underflow
+(p7) br.ret.sptk   b0                         // Return from main path
+}
+;;
+
+{    .bbb
+(p6) br.cond.spnt  SCALBNF_OVERFLOW            // Branch if certain overflow
+(p8) br.cond.spnt  SCALBNF_POSSIBLE_OVERFLOW   // Branch if possible overflow
+(p9) br.cond.spnt  SCALBNF_POSSIBLE_UNDERFLOW  // Branch if possible underflow
+}
+;;
+
+// Here if possible underflow.
+// Resulting exponent: 0x0ff81-23 <= exp_Result < 0x0ff81
+SCALBNF_POSSIBLE_UNDERFLOW:
+//
+// Here if possible overflow.
+// Resulting exponent: 0x1007e = exp_Result
+SCALBNF_POSSIBLE_OVERFLOW:

 //   Set up necessary status fields
 //
@ -200,34 +250,31 @@ GLOBAL_LIBM_ENTRY(__libm_scalblnf)
 //   S3 user supplied status + FZ + TD   (Underflows)
 //
 {    .mfi
-     nop.m 999
+     nop.m         0
     fsetc.s3      0x7F,0x41
-     nop.i 999
+     nop.i         0
 }
 {    .mfi
-     nop.m 999
+     nop.m         0
     fsetc.s2      0x7F,0x42
-     nop.i 999
-};;
+     nop.i         0
+}
+;;

 //
-//   Do final operation
+//   Do final operation with s2 and s3
 //
 {    .mfi
-     setf.exp      FR_NBig = GR_Scratch
-     fma.s.s0      FR_Result = FR_Two_N,FR_Norm_X,f0 
-     nop.i         999
+     setf.exp      FR_NBig = GR_neg_ov_limit
+     fma.s.s3      FR_Result3 = FR_Two_N,FR_Norm_X,f0
+     nop.i         0
 }
 {    .mfi
-     nop.m         999
-     fma.s.s3      FR_Result3 = FR_Two_N,FR_Norm_X,f0 
-     nop.i         999
-};;
-{    .mfi
-     setf.exp      FR_Big = GR_Scratch1
+     setf.exp      FR_Big = GR_pos_ov_limit
     fma.s.s2      FR_Result2 = FR_Two_N,FR_Norm_X,f0
-     nop.i         999
-};;
+     nop.i         0
+}
+;;

 //   Check for overflow or underflow.
 //   Restore s3
@ -236,70 +283,91 @@ GLOBAL_LIBM_ENTRY(__libm_scalblnf)
 {    .mfi
     nop.m         0
     fsetc.s3      0x7F,0x40
-     nop.i 999 
+     nop.i         0
 }
 {    .mfi
     nop.m         0
     fsetc.s2      0x7F,0x40
-     nop.i 999
-};;
+     nop.i         0
+}
+;;

 //
 //   Is the result zero?
 //
 {    .mfi
-     nop.m 999
-     fclass.m.unc  p6, p0 =  FR_Result3, 0x007
-     nop.i 999 
+     nop.m         0
+     fclass.m      p6, p0 =  FR_Result3, 0x007
+     nop.i         0
 }
 {    .mfi
-     addl          GR_Tag = 205, r0
-     fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big
+     nop.m         0
+     fcmp.ge.s1    p7, p8 = FR_Result2 , FR_Big
     nop.i         0
-};;
+}
+;;

 //
 //   Detect masked underflow - Tiny + Inexact Only
 //
 {    .mfi
-     nop.m 999
+     nop.m         0
 (p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
-     nop.i 999 
-};; 
+     nop.i         0
+}
+;;

 //
 //   Is result bigger the allowed range?
 //   Branch out for underflow
 //
 {    .mfb
-(p6) addl           GR_Tag = 206, r0
+     nop.m          0
 (p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
-(p6) br.cond.spnt   scalbnf_UNDERFLOW 
-};;
+(p6) br.cond.spnt   SCALBNF_UNDERFLOW
+}
+;;

 //
 //   Branch out for overflow
 //
-{ .mbb
-     nop.m 0
-(p7) br.cond.spnt   scalbnf_OVERFLOW 
-(p9) br.cond.spnt   scalbnf_OVERFLOW 
-};;
-
-//
-//   Return from main path.
-//
-{    .mfb
-     nop.m 999
-     nop.f 0
-     br.ret.sptk     b0;;                   
+{ .bbb
+(p7) br.cond.spnt   SCALBNF_OVERFLOW
+(p9) br.cond.spnt   SCALBNF_OVERFLOW
+     br.ret.sptk    b0             //   Return from main path.
 }
+;;
+
+// Here if result overflows
+SCALBNF_OVERFLOW:
+{ .mib
+     alloc         r32=ar.pfs,3,0,4,0
+     addl          GR_Tag = 205, r0    // Set error tag for overflow
+     br.cond.sptk  __libm_error_region // Call error support for overflow
+}
+;;
+
+// Here if result underflows
+SCALBNF_UNDERFLOW:
+{ .mib
+     alloc         r32=ar.pfs,3,0,4,0
+     addl          GR_Tag = 206, r0    // Set error tag for underflow
+     br.cond.sptk  __libm_error_region // Call error support for underflow
+}
+;;
+
+// Here if x=unorm
+SCALBNF_UNORM:
+{ .mib
+     getf.exp      GR_signexp_X = FR_Norm_X // Get signexp of normalized x
+     nop.i         0
+     br.cond.sptk  SCALBNF_COMMON            // Return to main path
+}
+;;
+

 GLOBAL_LIBM_END(__libm_scalblnf)
-__libm_error_region:
-
-scalbnf_OVERFLOW: 
-scalbnf_UNDERFLOW: 
+LOCAL_LIBM_ENTRY(__libm_error_region)

 //
 // Get stack address of N
@ -352,9 +420,9 @@ scalbnf_UNDERFLOW:
 //  Get location of result on stack
 //
 { .mmi
-   nop.m 0
-   nop.m 0
   add   GR_Parameter_RESULT = 48,sp
+   nop.m 0
+   nop.i 0
 };;

 //
--- a/sysdeps/ia64/fpu/libm_sincos.S
+++ b/sysdeps/ia64/fpu/libm_sincos.S
@ -46,12 +46,13 @@
 // 03/19/02 Added stack unwind around call to __libm_cis_large
 // 09/05/02 Work range is widened by reduction strengthen (3 parts of Pi/16)
 // 02/10/03 Reordered header: .section, .global, .proc, .align
+// 08/08/03 Improved performance
+// 02/11/04 cis is moved to the separate file.
 //
 // API
 //==============================================================
-// 1) double _Complex cis(double)
-// 2) void sincos(double, double*s, double*c)
-// 3) __libm_sincos - internal LIBM function, that accepts
+// 1) void sincos(double, double*s, double*c)
+// 2) __libm_sincos - internal LIBM function, that accepts
 //    argument in f8 and returns cosine through f8, sine through f9
 //
 // Overview of operation
@ -166,15 +167,14 @@
 // Registers used
 //==============================================================
 // general input registers:
-// r14 -> r19
-// r32 -> r49
+// r14 -> r39

 // predicate registers used:
 // p6 -> p14
-
+//
 // floating-point registers used
 // f9 -> f15
-// f32 -> f100
+// f32 -> f67

 // Assembly macros
 //==============================================================
@ -246,38 +246,32 @@ cis_Q                       = f67
 cis_pResSin                 = r33
 cis_pResCos                 = r34

-cis_exp_limit               = r35
-cis_r_signexp               = r36
-cis_AD_beta_table           = r37
-cis_r_sincos                = r38
-
-cis_r_exp                   = r39
-cis_r_17_ones               = r40
-
 cis_GR_sig_inv_pi_by_16     = r14
 cis_GR_rshf_2to61           = r15
 cis_GR_rshf                 = r16
 cis_GR_exp_2tom61           = r17
 cis_GR_n                    = r18
-
 cis_GR_n_sin                = r19
-cis_GR_m_sin                = r41
-cis_GR_32m_sin              = r41
+cis_exp_limit               = r20
+cis_r_signexp               = r21
+cis_AD_1                    = r22
+cis_r_sincos                = r23
+cis_r_exp                   = r24
+cis_r_17_ones               = r25
+cis_GR_m_sin                = r26
+cis_GR_32m_sin              = r26
+cis_GR_n_cos                = r27
+cis_GR_m_cos                = r28
+cis_GR_32m_cos              = r28
+cis_AD_2_sin                = r29
+cis_AD_2_cos                = r30
+cis_gr_tmp                  = r31

-cis_GR_n_cos                = r42
-cis_GR_m_cos                = r43
-cis_GR_32m_cos              = r43
-
-cis_AD_2_sin                = r44
-cis_AD_2_cos                = r45
-
-cis_gr_tmp                  = r46
-GR_SAVE_B0                  = r47
-GR_SAVE_GP                  = r48
-rB0_SAVED                   = r49
-GR_SAVE_PFS                 = r50
-GR_SAVE_PR                  = r51
-cis_AD_1                    = r52
+GR_SAVE_B0                  = r35
+GR_SAVE_GP                  = r36
+rB0_SAVED                   = r37
+GR_SAVE_PFS                 = r38
+GR_SAVE_PR                  = r39

 RODATA

@ -408,7 +402,7 @@ LOCAL_OBJECT_END(double_sin_cos_beta_k4)
 GLOBAL_IEEE754_ENTRY(sincos)
 // cis_GR_sig_inv_pi_by_16 = significand of 16/pi
 { .mlx
-      alloc         GR_SAVE_PFS             = ar.pfs, 0, 21, 0, 0
+      getf.exp      cis_r_signexp       = cis_Arg
      movl          cis_GR_sig_inv_pi_by_16 = 0xA2F9836E4E44152A

 }
@ -430,12 +424,11 @@ GLOBAL_IEEE754_ENTRY(sincos)
      br.cond.sptk  _CIS_COMMON
 };;
 GLOBAL_IEEE754_END(sincos)
-LOCAL_LIBM_ENTRY(cis)
-LOCAL_LIBM_END(cis)
+
 GLOBAL_LIBM_ENTRY(__libm_sincos)
 // cis_GR_sig_inv_pi_by_16 = significand of 16/pi
 { .mlx
-      alloc         GR_SAVE_PFS             = ar.pfs,0,21,0,0
+      getf.exp      cis_r_signexp       = cis_Arg
      movl          cis_GR_sig_inv_pi_by_16 = 0xA2F9836E4E44152A
 }
 // cis_GR_rshf_2to61 = 1.1000 2^(63+63-2)
@ -443,6 +436,7 @@ GLOBAL_LIBM_ENTRY(__libm_sincos)
      addl          cis_AD_1            = @ltoff(double_cis_pi), gp
      movl          cis_GR_rshf_2to61   = 0x47b8000000000000
 };;
+
 // p14 set for __libm_sincos and cis
 { .mfi
      ld8           cis_AD_1            = [cis_AD_1]
@ -476,10 +470,15 @@ _CIS_COMMON:
 //  2^-61 for scaling Nfloat
 //  0x1001a is register_bias + 27.
 //  So if f8 >= 2^27, go to large arguments routine
-{ .mmi
-      getf.exp      cis_r_signexp       = cis_Arg
-      setf.exp      cis_2TOM61          = cis_GR_exp_2tom61
+{ .mfi
+      alloc         GR_SAVE_PFS         = ar.pfs, 3, 5, 0, 0
+      fclass.m      p11,p0              = cis_Arg, 0x0b // Test for x=unorm
      mov           cis_exp_limit       = 0x1001a
+}
+{ .mib
+      setf.exp      cis_2TOM61          = cis_GR_exp_2tom61
+      nop.i         0
+(p6)  br.cond.spnt  _CIS_SPECIAL_ARGS
 };;

 //  Load the two pieces of pi/16
@ -488,9 +487,11 @@ _CIS_COMMON:
 { .mmb
      ldfe          cis_Pi_by_16_hi     = [cis_AD_1],16
      setf.d        cis_RSHF            = cis_GR_rshf
-(p6)  br.cond.spnt  _CIS_SPECIAL_ARGS
+(p11) br.cond.spnt  _CIS_UNORM          // Branch if x=unorm
 };;

+_CIS_COMMON2:
+// Return here if x=unorm
 // Create constant inexact set
 { .mmi
      ldfe          cis_Pi_by_16_lo     = [cis_AD_1],16
@ -498,23 +499,18 @@ _CIS_COMMON:
      nop.i         0
 };;

+// Select exponent (17 lsb)
 { .mfi
      ldfe          cis_Pi_by_16_lowest = [cis_AD_1],16
      nop.f         0
-      nop.i         0
+      dep.z         cis_r_exp           = cis_r_signexp, 0, 17
 };;

 // Start loading P, Q coefficients
-{ .mib
-      ldfpd         cis_P4,cis_Q4       = [cis_AD_1],16
-      dep.z         cis_r_exp           = cis_r_signexp, 0, 17
-      nop.b         0
-};;
-
 // p10 is true if we must call routines to handle larger arguments
 // p10 is true if f8 exp is > 0x1001a
 { .mmb
-      ldfpd         cis_P3,cis_Q3       = [cis_AD_1],16
+      ldfpd         cis_P4,cis_Q4       = [cis_AD_1],16
      cmp.ge        p10, p0             = cis_r_exp, cis_exp_limit
 (p10) br.cond.spnt  _CIS_LARGE_ARGS // go to |x| >= 2^27 path
 };;
@ -523,39 +519,33 @@ _CIS_COMMON:
 // Multiply x by scaled 16/pi and add large const to shift integer part of W to
 // rightmost bits of significand
 { .mfi
-      ldfpd  cis_P2,cis_Q2   = [cis_AD_1],16
+      ldfpd         cis_P3,cis_Q3       = [cis_AD_1],16
      fma.s1 cis_W_2TO61_RSH = cis_NORM_f8,cis_SIG_INV_PI_BY_16_2TO61,cis_RSHF_2TO61
      nop.i  0
 };;

+// get N = (int)cis_int_Nfloat
 // cis_NFLOAT = Round_Int_Nearest(cis_W)
+{ .mmf
+      getf.sig      cis_GR_n            = cis_W_2TO61_RSH
+      ldfpd  cis_P2,cis_Q2   = [cis_AD_1],16
+      fms.s1        cis_NFLOAT          = cis_W_2TO61_RSH,cis_2TOM61,cis_RSHF
+};;
+
+// cis_r = -cis_Nfloat * cis_Pi_by_16_hi + x
 { .mfi
      ldfpd         cis_P1,cis_Q1       = [cis_AD_1], 16
-      fms.s1        cis_NFLOAT          = cis_W_2TO61_RSH,cis_2TOM61,cis_RSHF
-      nop.i         0
-};;
-
-// get N = (int)cis_int_Nfloat
-{ .mfi
-      getf.sig      cis_GR_n            = cis_W_2TO61_RSH 
-      nop.f         0
-      nop.i         0
-};;
-
-// Add 2^(k-1) (which is in cis_r_sincos) to N
-// cis_r = -cis_Nfloat * cis_Pi_by_16_hi + x
-// cis_r =  cis_r -cis_Nfloat * cis_Pi_by_16_lo
-{ .mfi
-      add           cis_GR_n_cos        = 0x8, cis_GR_n
      fnma.s1       cis_r               = cis_NFLOAT,cis_Pi_by_16_hi,cis_NORM_f8
      nop.i         0
 };;

-//Get M (least k+1 bits of N)
+// Add 2^(k-1) (which is in cis_r_sincos) to N
 { .mmi
+      add           cis_GR_n_cos        = 0x8, cis_GR_n
+;;
+//Get M (least k+1 bits of N)
      and           cis_GR_m_sin        = 0x1f,cis_GR_n
      and           cis_GR_m_cos        = 0x1f,cis_GR_n_cos
-      nop.i         0
 };;

 { .mmi
@ -565,9 +555,10 @@ _CIS_COMMON:
 };;

 // Add 32*M to address of sin_cos_beta table
-{ .mmi
+// cis_r =  cis_r -cis_Nfloat * cis_Pi_by_16_lo
+{ .mfi
      add           cis_AD_2_sin        = cis_GR_32m_sin, cis_AD_1
-      nop.m         0
+      fnma.s1       cis_r               = cis_NFLOAT, cis_Pi_by_16_lo,  cis_r
      shl           cis_GR_32m_cos      = cis_GR_m_cos,5
 };;

@ -580,7 +571,6 @@ _CIS_COMMON:

 { .mfi
      ldfe          cis_Sm_cos          = [cis_AD_2_cos], 16
-      fnma.s1       cis_r               = cis_NFLOAT, cis_Pi_by_16_lo,  cis_r
      nop.i         0
 };;

@ -636,6 +626,12 @@ _CIS_COMMON:
      nop.i         0
 };;

+{ .mfi
+      nop.m         0
+      fmpy.s1       cis_rcub            = cis_r_exact, cis_rsq // get r^3
+      nop.i         0
+};;
+
 { .mfi
      nop.m         0
      fma.s1        cis_Q               = cis_rsq, cis_Q_temp2, cis_Q1
@ -647,12 +643,6 @@ _CIS_COMMON:
      nop.i         0
 };;

-{ .mfi
-      nop.m         0
-      fmpy.s1       cis_rcub            = cis_r_exact, cis_rsq // get r^3
-      nop.i         0
-};;
-
 { .mfi
      nop.m         0
      fma.s1        cis_Q_sin           = cis_srsq_sin,cis_Q, cis_Sm_sin
@ -717,7 +707,17 @@ _CIS_SPECIAL_ARGS:
      stfd          [cis_pResCos]       = cis_Cos_res
      br.ret.sptk   b0 // common exit for sincos main path
 };;
+
+_CIS_UNORM:
+// Here if x=unorm
+{ .mfb
+      getf.exp      cis_r_signexp       = cis_NORM_f8 // Get signexp of x
+      fcmp.eq.s0    p11,p0              = cis_Arg, f0 // Dummy op to set denorm
+      br.cond.sptk  _CIS_COMMON2        // Return to main path
+};;
+
 GLOBAL_LIBM_END(__libm_sincos)
+
 ////  |x| > 2^27 path  ///////
 .proc _CIS_LARGE_ARGS
 _CIS_LARGE_ARGS:
--- a/sysdeps/ia64/fpu/libm_sincos_large.S
+++ b/sysdeps/ia64/fpu/libm_sincos_large.S
@ -792,6 +792,7 @@ GLOBAL_LIBM_END(__libm_sincos_large)



+
 GLOBAL_LIBM_ENTRY(__libm_sin_large)

 { .mlx
@ -821,6 +822,7 @@ alloc GR_Table_Base = ar.pfs,0,12,2,0
 }

 GLOBAL_LIBM_END(__libm_sin_large)
+
 GLOBAL_LIBM_ENTRY(__libm_cos_large)

 { .mlx
@ -2673,6 +2675,7 @@ SINCOS_SPECIAL:
 }
 GLOBAL_LIBM_END(__libm_cos_large)

+
 // *******************************************************************
 // *******************************************************************
 // *******************************************************************
--- a/sysdeps/ia64/fpu/libm_sincosf.S
+++ b/sysdeps/ia64/fpu/libm_sincosf.S
@ -47,12 +47,12 @@
 // 03/19/02 Added stack unwind around call to __libm_cisf_large
 // 09/05/02 Work range is widened by reduction strengthen (2 parts of Pi/16)
 // 02/10/03 Reordered header: .section, .global, .proc, .align
+// 02/11/04 cisf is moved to the separate file.

 // API
 //==============================================================
-// 1) float _Complex cisf(float)
-// 2) void sincosf(float, float*s, float*c)
-// 3) __libm_sincosf - internal LIBM function, that accepts
+// 1) void sincosf(float, float*s, float*c)
+// 2) __libm_sincosf - internal LIBM function, that accepts
 //    argument in f8 and returns cosine through f8, sine through f9

 //
@ -420,8 +420,7 @@ GLOBAL_IEEE754_ENTRY(sincosf)
      br.cond.sptk  _CISF_COMMON
 };;
 GLOBAL_IEEE754_END(sincosf)
-LOCAL_LIBM_ENTRY(cisf)
-LOCAL_LIBM_END(cisf)
+
 GLOBAL_LIBM_ENTRY(__libm_sincosf)
 { .mlx
 // cisf_GR_sig_inv_pi_by_16 = significand of 16/pi
@ -679,6 +678,7 @@ _CISF_RETURN:
      br.ret.sptk   b0 // exit for sincos
 };;
 GLOBAL_LIBM_END(__libm_sincosf)
+
 ////  |x| > 2^24 path  ///////
 .proc _CISF_LARGE_ARGS
 _CISF_LARGE_ARGS:
--- a/sysdeps/ia64/fpu/libm_sincosl.S
+++ b/sysdeps/ia64/fpu/libm_sincosl.S
@ -1,7 +1,7 @@
-.file "libm_sincosl.asm"
+.file "libm_sincosl.s"


-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (c) 2000 - 2004, Intel Corporation
 // All rights reserved.
 //
 // Contributed 2000 by the Intel Numerics Group, Intel Corporation
@ -43,6 +43,9 @@
 // 05/13/02 Initial version of sincosl (based on libm's sinl and cosl)
 // 02/10/03 Reordered header: .section, .global, .proc, .align;
 //          used data8 for long double table values
+// 10/13/03 Corrected .file name
+// 02/11/04 cisl is moved to the separate file.
+// 10/26/04 Avoided using r14-31 as scratch so not clobbered by dynamic loader
 //
 //*********************************************************************
 //
@ -50,9 +53,8 @@
 //
 // API's
 //==============================================================
-// 1) long double _Complex cisl(long double)
-// 2) void sincosl(long double, long double*s, long double*c)
-// 3) __libm_sincosl - internal LIBM function, that accepts
+// 1) void sincosl(long double, long double*s, long double*c)
+// 2) __libm_sincosl - internal LIBM function, that accepts
 //    argument in f8 and returns cosine through f8, sine through f9
 //
 //
@ -65,7 +67,7 @@
 //                              f32-f121
 //
 //    General Purpose Registers:
-//      r32-r47
+//      r32-r61
 //
 //    Predicate Registers:      p6-p15
 //
@ -775,20 +777,6 @@ FR_Tmp            = f94
 sincos_pResSin = r34
 sincos_pResCos = r35

-GR_sig_inv_pi  = r14
-GR_rshf_2to64  = r15
-GR_exp_2tom64  = r16
-GR_rshf        = r17
-GR_ad_p        = r18
-GR_ad_d        = r19
-GR_ad_pp       = r20
-GR_ad_qq       = r21
-GR_ad_c        = r22
-GR_ad_s        = r23
-GR_ad_ce       = r24
-GR_ad_se       = r25
-GR_ad_m14      = r26
-GR_ad_s1       = r27
 GR_exp_m2_to_m3= r36
 GR_N_Inc       = r37
 GR_Cis         = r38
@ -803,6 +791,20 @@ GR_N_SignS     = r45
 GR_N_SignC     = r46
 GR_N_SinCos    = r47

+GR_sig_inv_pi  = r48
+GR_rshf_2to64  = r49
+GR_exp_2tom64  = r50
+GR_rshf        = r51
+GR_ad_p        = r52
+GR_ad_d        = r53
+GR_ad_pp       = r54
+GR_ad_qq       = r55
+GR_ad_c        = r56
+GR_ad_s        = r57
+GR_ad_ce       = r58
+GR_ad_se       = r59
+GR_ad_m14      = r60
+GR_ad_s1       = r61

 // For unwind support
 GR_SAVE_B0     = r39
@ -814,7 +816,7 @@ GR_SAVE_PFS    = r41

 GLOBAL_IEEE754_ENTRY(sincosl)
 { .mlx  ///////////////////////////// 1 /////////////////
-      alloc r32 = ar.pfs,3,13,2,0
+      alloc r32 = ar.pfs,3,27,2,0
      movl GR_sig_inv_pi = 0xa2f9836e4e44152a // significand of 1/pi
 }
 { .mlx
@ -834,11 +836,9 @@ GLOBAL_IEEE754_ENTRY(sincosl)
 };;
 GLOBAL_IEEE754_END(sincosl)

-LOCAL_LIBM_ENTRY(cisl)
-LOCAL_LIBM_END(cisl)
 GLOBAL_LIBM_ENTRY(__libm_sincosl)
 { .mlx  ///////////////////////////// 1 /////////////////
-      alloc r32 = ar.pfs,3,14,2,0
+      alloc r32 = ar.pfs,3,27,2,0
      movl GR_sig_inv_pi = 0xa2f9836e4e44152a // significand of 1/pi
 }
 { .mlx
@ -2447,6 +2447,7 @@ SINCOSL_SPECIAL:

 GLOBAL_LIBM_END(__libm_sincosl)

+
 // *******************************************************************
 // *******************************************************************
 // *******************************************************************
@ -2461,7 +2462,7 @@ GLOBAL_LIBM_END(__libm_sincosl)
 //         c is in f9
 //         N is in r8
 //     Be sure to allocate at least 2 GP registers as output registers for
-//     __libm_pi_by_2_reduce.  This routine uses r49-50. These are used as
+//     __libm_pi_by_2_reduce.  This routine uses r62-63. These are used as
 //     scratch registers within the __libm_pi_by_2_reduce routine (for speed).
 //
 //     We know also that __libm_pi_by_2_reduce preserves f10-15, f71-127.  We
--- a/sysdeps/ia64/fpu/libm_support.h
+++ b/sysdeps/ia64/fpu/libm_support.h
--- a/sysdeps/ia64/fpu/s_asinh.S
+++ b/sysdeps/ia64/fpu/s_asinh.S
@ -1134,3 +1134,4 @@ ASINH_UNORM:
 ;;

 GLOBAL_LIBM_END(asinh)
+
--- a/sysdeps/ia64/fpu/s_asinhl.S
+++ b/sysdeps/ia64/fpu/s_asinhl.S
@ -1344,3 +1344,4 @@ near_0:
 GLOBAL_LIBM_END(asinhl)


+
--- a/sysdeps/ia64/fpu/s_atanf.S
+++ b/sysdeps/ia64/fpu/s_atanf.S
@ -553,3 +553,4 @@ ATANF_X_INF_NAN_ZERO:
 ;;

 GLOBAL_LIBM_END(atanf)
+
--- a/sysdeps/ia64/fpu/s_atanl.S
+++ b/sysdeps/ia64/fpu/s_atanl.S
@ -812,6 +812,7 @@ GLOBAL_IEEE754_ENTRY(atanl)
 ;;

 GLOBAL_IEEE754_END(atanl)
+
 GLOBAL_IEEE754_ENTRY(atan2l)

 { .mfi
@ -1951,6 +1952,7 @@ ATANL_ArgY_Not_INF:
 ;;

 GLOBAL_IEEE754_END(atan2l)
+ 
 LOCAL_LIBM_ENTRY(__libm_error_region)
 .prologue
 { .mfi
--- a/sysdeps/ia64/fpu/s_cbrtf.S
+++ b/sysdeps/ia64/fpu/s_cbrtf.S
@ -762,3 +762,4 @@ GLOBAL_LIBM_END(cbrtf)



+
--- a/sysdeps/ia64/fpu/s_cbrtl.S
+++ b/sysdeps/ia64/fpu/s_cbrtl.S
@ -1,7 +1,7 @@
 .file "cbrtl.s"


-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (c) 2000 - 2004, Intel Corporation
 // All rights reserved.
 //
 // Contributed 2000 by the Intel Numerics Group, Intel Corporation
@ -42,6 +42,7 @@
 // 04/28/00 Initial version
 // 05/20/02 Cleaned up namespace and sf0 syntax
 // 02/06/03 Reordered header:.section,.global,.proc,.align
+// 11/23/04 Reformatted routine and improved speed
 //
 // API
 //==============================================================
@ -53,49 +54,93 @@
 //
 // Implementation
 //
-//   cbrt(a) = cbrt(a y) / cbrt(y)
-//          = cbrt(1 - (1 - a y)) * 1/cbrt(y)
+// The result is computed as
+// cbrt(x)= cbrt(1 - (1 - x*y)) * (1/cbrt(y))
+// where y = frcpa(x) = (-1)^sgn_y * 2^(3*k+j) * m_y,  
+//       m_y in [1,2),  j in {0,1,2}
 //
-// where y = frcpa(a). 
+//  cbrt(1 - (1 - x*y)) is approximated by a degree-6 polynomial
+//  in r= 1 - x*y :
+// P = 1 - c_1 r - c_2 * r^2 - c_3 * r^3 - c_4 * r^4 - c_5 * r^5 - c_6 * r^6
 //
-//  * cbrt(1 - (1 - a y)) is approximated by a degree-6 polynomial 
 //
-//  1 - c_1 r - c_2 * r^2 - c_3 * r^3 - c_4 * r^4 - c_5 * r^5 - c_6 * r^6
-// 
-//  in r = 1 - a y.
-//
-//  * The values 1/cbrt(y) are stored as two tables of constants T_hi
+// The values (1/cbrt(y)) are stored as two tables of constants T_hi
 // (double-extended precision) and D (single precision) as follows:
-//
 // T_hi (1 + D)= 1/cbrt(y) to about 80 bits of accuracy
 //
-// The tables are only stored for three exponent values and are
-// then multiplied by e/3 where e is the exponent of the input number.
-// This computation is carried out in parallel with the polynomial
-// evaluation:
+// The tables are only stored for three exponent values (i.e. 
+// only for 2^j * m_y, where j in {0,1,2} and m_y covers the 256
+// possible mantissas for an frcpa result); the index is formed
+// by the 8 leading mantissa bits of x, which is the same index used
+// by the hardware to get frcpa(x).
 //
-//      T = 2^(e/3) * T_hi
-
-
-
-
+// The table values are multiplied by 2^k where e is the exponent of 
+// the input number.  This multiplication is carried out in parallel with
+// the polynomial evaluation:
+// T= 2^(k) * T_hi
+//
+//=======================================================================

 //===============
-// input = x
-// C = frcpa(x)
-// r = C * x - 1
-//
 // Special values
 //==============================================================

-
-
 // Registers used
 //==============================================================
-//   f6-f15
-//   r2-r3, r23-r30
 // p6, p7, p12
+           FR_R       =  f6
+           FR_C1      =  f7
+           FR_C2      =  f9
+           FR_C3      =  f10
+           FR_C4      =  f11
+           FR_C5      =  f12
+           FR_C6      =  f13
+           FR_XNORM   =  f14
+           FR_D       =  f15
+           FR_SPECIAL =  f32
+           FR_RCP     =  f33
+           FR_R2      =  f34
+           FR_P1      =  f35
+           FR_P2      =  f36
+           FR_P3      =  f37
+           FR_P4      =  f38
+           FR_P5      =  f39
+           FR_R3      =  f40
+           FR_T       =  f41
+           FR_TF      =  f42
+           FR_P       =  f43
+           FR_SGNEXP  =  f44

+           GR_ADDR       = r2
+           GR_C_START    = r2
+           GR_ARGSIG     = r3
+           GR_NORMSIG    = r15
+           GR_D_ADDR     = r16
+           GR_D_START    = r16
+           GR_INDEX2     = r17
+           GR_IX2        = r17
+           GR_NORMEXP    = r18
+           GR_EXP5       = r19
+           GR_EXP3       = r20
+           GR_EXP6       = r20
+           GR_EXP17      = r21
+           GR_TMP1       = r21
+           GR_SGNMASK    = r22
+           GR_T_INDEX    = r23
+           GR_IX_T       = r23  
+           GR_IX_D       = r24
+           GR_D_INDEX    = r24
+           GR_TMP2       = r25
+           GR_TMP3       = r25
+           GR_TMP4       = r25
+           GR_EXP_RES    = r26
+           GR_BIAS23     = r27
+           GR_EXPBIAS    = r27
+           GR_EXP_MOD_3  = r28
+           GR_SIGN       = r29
+           GR_EXPSIGNRES = r29
+           GR_REMTMP     = r30
+           GR_NORMEXPSGN = r31


 // Data tables
@ -116,7 +161,6 @@ LOCAL_OBJECT_END(poly_coeffs)

 LOCAL_OBJECT_START(T_table)

-
       data8 0x80155c748c374836, 0x8040404b0879f7f9
       data8 0x806b5dce4b405c10, 0x8096b586974669b1
       data8 0x80bcd273d952a028, 0x80e898c52813f2f3
@ -504,10 +548,6 @@ data8 0xffc01fed60f86fb5, 0xffeaae3832b63956
 LOCAL_OBJECT_END(T_table)


-
-
-
-
 LOCAL_OBJECT_START(D_table)

       data4 0x1e50f488, 0x1ebdc559, 0x1e649ec1, 0x9eed9b2c
@ -709,184 +749,238 @@ LOCAL_OBJECT_END(D_table)
 GLOBAL_LIBM_ENTRY(cbrtl)

 { .mfi
-  getf.sig r3=f8
-     // will continue only for normal/denormal numbers          
+       getf.sig GR_ARGSIG = f8
+       // will continue on main path only for normal/denormal numbers
+       // all other values will be filtered out and will exit early
       fclass.nm.unc p12, p7 = f8, 0x1b
-  // r2 = pointer to C_1...C_6 followed by T_table
-  addl r2 = @ltoff(poly_coeffs), gp;;
+       // GR_ADDR = pointer to C_1...C_6 followed by T_table
+       addl GR_ADDR = @ltoff(poly_coeffs), gp
 }
 { .mfi
-  // r29=2/3*bias -63=0xaaaa-0x3f=0xaa6b
-  mov r29=0xaa6b    
+       // GR_BIAS23 = 2/3*bias -63 = 0xaaaa-0x3f = 0xaa6b
+       mov GR_BIAS23 = 0xaa6b
       // normalize a
-  fma.s1 f14=f8,f1,f0
-  // r27 = pointer to D table
-  addl r27 = @ltoff(D_table), gp;;
-}
-{.mib
-  nop.m 0
-  (p7) cmp.eq p12,p0=r3,r0
-  nop.b 0;;
-}
-{.mfb
-  // load start address for C_1...C_6 followed by T_table
-  ld8 r2=[r2]
-  (p12) fma.s0 f8=f8,f1,f0
-  (p12) br.ret.spnt b0;;
+       fma.s1 FR_XNORM = f8, f1, f0
+       // GR_D_ADDR = pointer to D table
+       addl GR_D_ADDR = @ltoff(D_table), gp
 }
+;;
+
 { .mmf
-  // load C_1
-  ldfe f7=[r2],16
+       // load start address for C_1...C_6 followed by T_table
+       ld8 GR_C_START = [ GR_ADDR ]
       // load start address of D table
-  ld8 r27=[r27]
+       ld8 GR_D_START = [ GR_D_ADDR ]
       // y = frcpa(a)
-  frcpa.s0 f8,p6=f1,f8;;
-}
-{.mmi
-  // load C_2
-  ldfe f9=[r2],16;;
-   // load C_3, C_4
-  ldfpd f10,f11=[r2],16
-  nop.i 0;;
+       frcpa.s1 FR_RCP, p6 = f1, f8
 }
+;;
+
 { .mmi
        // get normalized significand
-  getf.sig r23=f14
+       getf.sig GR_NORMSIG = FR_XNORM
        // get exponent
-  getf.exp r24=f14
-  mov r25=0x20000;;
+       getf.exp GR_NORMEXPSGN = FR_XNORM
+ (p7)  cmp.eq p12, p0 = GR_ARGSIG, r0
 }
+;;
+
 { .mii
-  // get r26=sign
-  and r26=r24,r25
-  // eliminate leading 1 from r23=2nd table index
-  shl r23=r23,1
-  // eliminate sign from exponent (r25)
-  andcm r25=r24,r25;;
+       // load C_1
+       ldfe FR_C1 = [ GR_C_START ], 16
+       mov GR_SGNMASK = 0x20000
+       nop.i 0
 }
+;;
+
+{ .mfb
+       // load C_2
+       ldfe FR_C2 = [ GR_C_START ], 16
+ (p12) fma.s0 f8 = f8, f1, f0
+       // NaN/Infinities exit early
+ (p12) br.ret.spnt b0
+}
+;;
+
+{ .mfi
+       // load C_3, C_4
+       ldfpd FR_C3, FR_C4 = [ GR_C_START ], 16
+       // y = frcpa(a), set flags and result when argument is 0
+       // only used when p6=0
+       frcpa.s0 f8, p0 = f1, f8
+       nop.i 0
+}
+;;
+
+{ .mii
+       // get GR_SIGN = sign
+       and GR_SIGN = GR_NORMEXPSGN, GR_SGNMASK
+       // eliminate leading 1 from GR_NORMSIG = 2nd table index
+       shl GR_INDEX2 = GR_NORMSIG, 1
+       // eliminate sign from exponent 
+       andcm GR_NORMEXP = GR_NORMEXPSGN, GR_SGNMASK
+}
+;;
+
 { .mfi
       // load C_5, C_6
-  (p6) ldfpd f12,f13=[r2],16
+ (p6)  ldfpd FR_C5, FR_C6 = [ GR_C_START ], 16
       // r = 1-a*y
-  (p6) fnma.s1 f6=f8,f14,f1
-  // 1: exponent*=5;  // (2^{16}-1)/3=0x5555
-  shladd r24=r25,2,r25;;
+ (p6)  fnma.s1 FR_R = FR_RCP, FR_XNORM, f1
+       // Start computation of floor(exponent/3) by
+       // computing (2^20+2)/3*exponent = exponent*0x55556
+       // 1: exponent* = 5; 
+       // (2^{16}-1)/3 = 0x5555: 
+       // will form 0x5555*exponent by using shladd's
+       shladd GR_EXP5 = GR_NORMEXP, 2, GR_NORMEXP
 }
+;;
+
 { .mib
-  // r30=(5*expon)*16
-  shladd r30=r24,4,r0
-  // r28=3*exponent
-  shladd r28=r25,1,r25
-  nop.b 0;;
+       // Next several integer steps compute floor(exponent/3)
+       // GR_TMP1 = (5*expon)*16
+       shladd GR_TMP1 = GR_EXP5, 4, r0
+       // GR_EXP3 = 3*exponent
+       shladd GR_EXP3 = GR_NORMEXP, 1, GR_NORMEXP
+       nop.b 0
 }
+;;
+
 { .mmi
-  // r28=6*exponent
-  shladd r28=r28,1,r0
-  // r24=17*expon
-  add r24=r24,r30
-  // r23=2nd table index (8 bits)
-  shr.u r23=r23,56;;
+       // GR_EXP6 = 6*exponent
+       shladd GR_EXP6 = GR_EXP3, 1, r0
+       // GR_EXP17 = 17*expon
+       add GR_EXP17 = GR_EXP5, GR_TMP1
+       // GR_IX2 = 2nd table index (8 bits)
+       shr.u GR_IX2 = GR_INDEX2, 56
 }
+;;
+
 { .mmi
       // adjust T_table pointer by 2nd index
-  shladd r2=r23,3,r2
+       shladd GR_T_INDEX = GR_IX2, 3, GR_C_START
       // adjust D_table pointer by 2nd index
-  shladd r27=r23,2,r27
-  // r30=(17*expon)*16^2
-  shl r30=r24,8;;
+       shladd GR_D_INDEX = GR_IX2, 2, GR_D_START
+       // GR_TMP2 = (17*expon)*16^2
+       shl GR_TMP2 = GR_EXP17, 8
 }
+;;
+
 { .mmi
-  // r24=expon*(2^16-1)/3
-  add r24=r24,r30;;
-  // r24=expon*(2^20+2)/3=expon*0x55556
-  shladd r24=r24,4,r28
-  nop.i 0;;
+       // GR_TMP3 = expon*(2^16-1)/3
+       add GR_TMP3 = GR_EXP17, GR_TMP2
+;;
+       // GR_TMP4 = expon*(2^20+2)/3 = expon*0x55556
+       shladd GR_TMP4 = GR_TMP3, 4, GR_EXP6
+       nop.i 0
 }
+;;
+
 { .mii
       nop.m 0
-  // r24=floor(expon/3)
-  shr.u r24=r24,20
-  nop.i 0;;
+       // GR_EXP_RES = floor(expon/3)
+       shr.u GR_EXP_RES = GR_TMP4, 20
+       nop.i 0
 }
+;;
+
 { .mmi
       nop.m 0
-  // r28=3*exponent
-  shladd r28=r24,1,r24
+       // r16 = 3*exponent
+       shladd r16 = GR_EXP_RES, 1, GR_EXP_RES
       // bias exponent
-  add r24=r29,r24;;
+       add GR_EXPBIAS = GR_BIAS23, GR_EXP_RES
 }
+;;
+
 { .mmi
       // get remainder of exponent/3
-  sub r25=r25,r28;;
+       sub GR_EXP_MOD_3 = GR_NORMEXP, r16
+;;
       // add sign to exponent
-  or r24=r24,r26
+       or GR_EXPSIGNRES = GR_EXPBIAS, GR_SIGN
       // remainder << = 8
-  shl r25=r25,8;;
+       shl GR_REMTMP = GR_EXP_MOD_3, 8
 }
+;;
+
 { .mfi
       // adjust D_table pointer by 1st index
-  shladd r27=r25,2,r27
+       shladd GR_IX_D = GR_REMTMP, 2, GR_D_INDEX
       // P_1 = C_1+C_2*r
-  (p6) fma.s1 f7=f9,f6,f7
+ (p6)  fma.s1 FR_P1 = FR_C2, FR_R, FR_C1
       // adjust T_table pointer by 1st index
-  shladd r2=r25,3,r2
+       shladd GR_IX_T = GR_REMTMP, 3, GR_T_INDEX
 }
 { .mfi
-  // f14=sign*2^{exponent/3}
-  (p6) setf.exp f14=r24
-  // r2=r*r
-  (p6) fma.s1 f9=f6,f6,f0
-  nop.i 0;;
+       // FR_SGNEXP = sign*2^{exponent/3}
+ (p6)  setf.exp FR_SGNEXP = GR_EXPSIGNRES
+       // r^2 = r*r
+ (p6)  fma.s1 FR_R2 = FR_R, FR_R, f0
+       nop.i 0
 }
+;;
+
 { .mfi
       // load D
-  (p6) ldfs f15=[r27]
+ (p6)  ldfs FR_D = [ GR_IX_D ]
       // P_2 = C_3+C_4*r
-  (p6) fma.s1 f10=f11,f6,f10
+ (p6)  fma.s1 FR_P2 = FR_C4, FR_R, FR_C3
       nop.i 0
 }
 { .mfi
       // load T
-  (p6) ldf8 f8=[r2]
+ (p6)  ldf8 FR_T = [ GR_IX_T ]
       // P_3 = C_5+C_6*r
-  (p6) fma.s1 f12=f13,f6,f12
-  nop.i 0;;
+ (p6)  fma.s1 FR_P3 = FR_C6, FR_R, FR_C5
+       nop.i 0
 }
+;;
+
 { .mfi
       nop.m 0
       // P_4 = D-r*P_1
-  (p6) fnma.s1 f15=f6,f7,f15
+ (p6)  fnma.s1 FR_P4 = FR_R, FR_P1, FR_D
       nop.i 0
 }
 { .mfi
       nop.m 0
-  // r3=r*r2
-  (p6) fma.s1 f6=f6,f9,f0
-  nop.i 0;;
+       // r^3 = r*r^2
+ (p6)  fma.s1 FR_R3 = FR_R, FR_R2, f0
+       nop.i 0
 }
+;;
+
 { .mfi
       nop.m 0
       // P_5 = P_2+r2*P_3
-  (p6) fma.s1 f10=f9,f12,f10
-  nop.i 0;;
+ (p6)  fma.s1 FR_P5 = FR_R2, FR_P3, FR_P2
+       nop.i 0
 }
+;;
+
 { .mfi
       nop.m 0
       // T = T*(sign*2^{exponent/3})
-  (p6) fma.s1 f8=f8,f14,f0
+ (p6)  fma.s1 FR_TF = FR_T, FR_SGNEXP, f0
       nop.i 0
 }
 { .mfi
       nop.m 0
       // P = P_4-r3*P_5
-  (p6) fnma.s1 f6=f6,f10,f15
-  nop.i 0;;
+ (p6)  fnma.s1 FR_P = FR_R3, FR_P5, FR_P4
+       nop.i 0
 }
+;;
+
 { .mfb
       nop.m 0
       // result = T+T*p
-  (p6) fma.s0 f8=f8,f6,f8
-  br.ret.sptk b0;;
+ (p6)  fma.s0 f8 = FR_TF, FR_P, FR_TF
+       br.ret.sptk b0
 }
+;;
+
 GLOBAL_LIBM_END(cbrtl)

+
--- a/sysdeps/ia64/fpu/s_cos.S
+++ b/sysdeps/ia64/fpu/s_cos.S
@ -1,7 +1,7 @@
 .file "sincos.s"


-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (c) 2000 - 2004, Intel Corporation
 // All rights reserved.
 //
 // Contributed 2000 by the Intel Numerics Group, Intel Corporation
@ -51,6 +51,8 @@
 // 06/03/02 Insure inexact flag set for large arg result
 // 09/05/02 Work range is widened by reduction strengthen (3 parts of Pi/16)
 // 02/10/03 Reordered header: .section, .global, .proc, .align
+// 08/08/03 Improved performance
+// 10/28/04 Saved sincos_r_sincos to avoid clobber by dynamic loader 

 // API
 //==============================================================
@ -170,11 +172,11 @@
 // Registers used
 //==============================================================
 // general input registers:
-// r14 -> r19
-// r32 -> r45
+// r14 -> r26
+// r32 -> r35

 // predicate registers used:
-// p6 -> p14
+// p6 -> p11

 // floating-point registers used
 // f9 -> f15
@ -236,16 +238,6 @@ fp_tmp                         = f61

 /////////////////////////////////////////////////////////////

-sincos_AD_1                    = r33
-sincos_AD_2                    = r34
-sincos_exp_limit               = r35
-sincos_r_signexp               = r36
-sincos_AD_beta_table           = r37
-sincos_r_sincos                = r38
-
-sincos_r_exp                   = r39
-sincos_r_17_ones               = r40
-
 sincos_GR_sig_inv_pi_by_16     = r14
 sincos_GR_rshf_2to61           = r15
 sincos_GR_rshf                 = r16
@ -254,11 +246,18 @@ sincos_GR_n                    = r18
 sincos_GR_m                    = r19
 sincos_GR_32m                  = r19
 sincos_GR_all_ones             = r19
+sincos_AD_1                    = r20
+sincos_AD_2                    = r21
+sincos_exp_limit               = r22
+sincos_r_signexp               = r23
+sincos_r_17_ones               = r24
+sincos_r_sincos                = r25
+sincos_r_exp                   = r26

-gr_tmp                         = r41
-GR_SAVE_PFS                    = r41
-GR_SAVE_B0                     = r42
-GR_SAVE_GP                     = r43
+GR_SAVE_PFS                    = r33
+GR_SAVE_B0                     = r34
+GR_SAVE_GP                     = r35
+GR_SAVE_r_sincos               = r36


 RODATA
@ -405,7 +404,7 @@ LOCAL_OBJECT_END(double_sin_cos_beta_k4)
 GLOBAL_IEEE754_ENTRY(sin)

 { .mlx
-      alloc         r32                 = ar.pfs, 1, 13, 0, 0
+      getf.exp      sincos_r_signexp    = f8
      movl sincos_GR_sig_inv_pi_by_16   = 0xA2F9836E4E44152A // signd of 16/pi
 }
 { .mlx
@ -427,10 +426,11 @@ GLOBAL_IEEE754_ENTRY(sin)
 ;;

 GLOBAL_IEEE754_END(sin)
+
 GLOBAL_IEEE754_ENTRY(cos)

 { .mlx
-      alloc         r32                 = ar.pfs, 1, 13, 0, 0
+      getf.exp      sincos_r_signexp    = f8
      movl sincos_GR_sig_inv_pi_by_16   = 0xA2F9836E4E44152A // signd of 16/pi
 }
 { .mlx
@ -464,7 +464,6 @@ _SINCOS_COMMON:
 // Form two constants we need
 //  16/pi * 2^-2 * 2^63, scaled by 2^61 since we just loaded the significand
 //  1.1000...000 * 2^(63+63-2) to right shift int(W) into the low significand
-// fcmp used to set denormal, and invalid on snans
 { .mfi
      setf.sig      sincos_SIG_INV_PI_BY_16_2TO61 = sincos_GR_sig_inv_pi_by_16
      fclass.m      p6,p0                         = f8, 0xe7 // if x = 0,inf,nan
@ -480,10 +479,15 @@ _SINCOS_COMMON:
 //  2^-61 for scaling Nfloat
 // 0x1001a is register_bias + 27.
 // So if f8 >= 2^27, go to large argument routines
-{ .mmi
-      getf.exp      sincos_r_signexp    = f8
+{ .mfi
+      alloc         r32                 = ar.pfs, 1, 4, 0, 0
+      fclass.m      p11,p0              = f8, 0x0b // Test for x=unorm
+      mov           sincos_GR_all_ones  = -1 // For "inexect" constant create
+}
+{ .mib
      setf.exp      sincos_2TOM61       = sincos_GR_exp_2tom61
-      addl          gr_tmp              = -1,r0 // For "inexect" constant create
+      nop.i         999
+(p6)  br.cond.spnt  _SINCOS_SPECIAL_ARGS
 }
 ;;

@ -493,41 +497,31 @@ _SINCOS_COMMON:
 { .mmb
      ldfe          sincos_Pi_by_16_1   = [sincos_AD_1],16
      setf.d        sincos_RSHF         = sincos_GR_rshf
-(p6)  br.cond.spnt  _SINCOS_SPECIAL_ARGS
+(p11) br.cond.spnt  _SINCOS_UNORM       // Branch if x=unorm
 }
 ;;

+_SINCOS_COMMON2:
+// Return here if x=unorm
+// Create constant used to set inexact
 { .mmi
      ldfe          sincos_Pi_by_16_2   = [sincos_AD_1],16
-      setf.sig      fp_tmp              = gr_tmp // constant for inexact set
-      nop.i         999
-};;
-
-{ .mfi
-      ldfe          sincos_Pi_by_16_3   = [sincos_AD_1],16
-      nop.f         999
-      nop.i         999
-};;
-
-// Polynomial coefficients (Q4, P4, Q3, P3, Q2, Q1, P2, P1) loading
-{ .mmi
-      ldfpd         sincos_P4,sincos_Q4 = [sincos_AD_1],16
-      nop.m         999
+      setf.sig      fp_tmp              = sincos_GR_all_ones
      nop.i         999
 };;

 // Select exponent (17 lsb)
-{ .mmi
-      ldfpd         sincos_P3,sincos_Q3 = [sincos_AD_1],16
-      nop.m         999
+{ .mfi
+      ldfe          sincos_Pi_by_16_3   = [sincos_AD_1],16
+      nop.f         999
      dep.z         sincos_r_exp        = sincos_r_signexp, 0, 17 
-}
-;;
+};;

+// Polynomial coefficients (Q4, P4, Q3, P3, Q2, Q1, P2, P1) loading
 // p10 is true if we must call routines to handle larger arguments
 // p10 is true if f8 exp is >= 0x1001a (2^27)
 { .mmb
-      ldfpd         sincos_P2,sincos_Q2 = [sincos_AD_1],16
+      ldfpd         sincos_P4,sincos_Q4 = [sincos_AD_1],16
      cmp.ge        p10,p0              = sincos_r_exp,sincos_exp_limit 
 (p10) br.cond.spnt  _SINCOS_LARGE_ARGS // Go to "large args" routine
 };;
@ -536,66 +530,61 @@ _SINCOS_COMMON:
 // Multiply x by scaled 16/pi and add large const to shift integer part of W to
 //   rightmost bits of significand
 { .mfi
-      ldfpd         sincos_P1,sincos_Q1 = [sincos_AD_1],16
+      ldfpd         sincos_P3,sincos_Q3 = [sincos_AD_1],16
      fma.s1 sincos_W_2TO61_RSH = sincos_NORM_f8,sincos_SIG_INV_PI_BY_16_2TO61,sincos_RSHF_2TO61
      nop.i         999
 };;

+// get N = (int)sincos_int_Nfloat
 // sincos_NFLOAT = Round_Int_Nearest(sincos_W)
 // This is done by scaling back by 2^-61 and subtracting the shift constant
-{ .mfi
-      nop.m         999
-      fms.s1 sincos_NFLOAT = sincos_W_2TO61_RSH,sincos_2TOM61,sincos_RSHF
-      nop.i         999 
-};;
-
-
-// get N = (int)sincos_int_Nfloat
-{ .mfi
+{ .mmf
      getf.sig      sincos_GR_n         = sincos_W_2TO61_RSH
-      nop.f         999
-      nop.i         999 
+      ldfpd         sincos_P2,sincos_Q2 = [sincos_AD_1],16
+      fms.s1 sincos_NFLOAT = sincos_W_2TO61_RSH,sincos_2TOM61,sincos_RSHF
 };;

-// Add 2^(k-1) (which is in sincos_r_sincos) to N
 // sincos_r          = -sincos_Nfloat * sincos_Pi_by_16_1 + x
 { .mfi
-      add           sincos_GR_n         = sincos_GR_n, sincos_r_sincos
+      ldfpd         sincos_P1,sincos_Q1 = [sincos_AD_1],16
      fnma.s1 sincos_r = sincos_NFLOAT, sincos_Pi_by_16_1, sincos_NORM_f8
      nop.i         999 
 };;

-// Get M (least k+1 bits of N)
+// Add 2^(k-1) (which is in sincos_r_sincos) to N
 { .mmi
-      and           sincos_GR_m         = 0x1f,sincos_GR_n;;
+      add           sincos_GR_n         = sincos_GR_n, sincos_r_sincos
+;;
+// Get M (least k+1 bits of N)
+      and           sincos_GR_m         = 0x1f,sincos_GR_n
+      nop.i         999 
+};;
+
+// sincos_r          = sincos_r -sincos_Nfloat * sincos_Pi_by_16_2
+{ .mfi
      nop.m         999
+      fnma.s1 sincos_r = sincos_NFLOAT, sincos_Pi_by_16_2,  sincos_r
      shl           sincos_GR_32m       = sincos_GR_m,5
 };;

 // Add 32*M to address of sin_cos_beta table
+// For sin denorm. - set uflow
 { .mfi
      add           sincos_AD_2         = sincos_GR_32m, sincos_AD_1
-(p8)  fclass.m.unc  p10,p0              = f8,0x0b // For sin denorm. - set uflow
+(p8)  fclass.m.unc  p10,p0              = f8,0x0b
      nop.i         999 
 };;

 // Load Sin and Cos table value using obtained index m  (sincosf_AD_2)
 { .mfi
      ldfe          sincos_Sm           = [sincos_AD_2],16
-(p9)  fclass.m.unc  p11,p0              = f8,0x0b // For cos denorm - set denorm
-      nop.i         999 
-};;
-
-// sincos_r          = sincos_r -sincos_Nfloat * sincos_Pi_by_16_2
-{ .mfi
-      ldfe          sincos_Cm           = [sincos_AD_2]
-      fnma.s1 sincos_r = sincos_NFLOAT, sincos_Pi_by_16_2,  sincos_r
+      nop.f         999 
      nop.i         999 
 };;

 // get rsq = r*r
 { .mfi
-      nop.m         999
+      ldfe          sincos_Cm           = [sincos_AD_2]
      fma.s1        sincos_rsq          = sincos_r, sincos_r,   f0 // r^2 = r*r
      nop.i         999
 }
@ -660,7 +649,6 @@ _SINCOS_COMMON:
      fma.s1        sincos_Q            = sincos_rsq, sincos_Q_temp2, sincos_Q1
      nop.i         999
 }
-
 { .mfi
      nop.m         999
      fma.s1        sincos_P            = sincos_rsq, sincos_P_temp2, sincos_P1
@ -675,7 +663,6 @@ _SINCOS_COMMON:
      fma.s1        sincos_Q            = sincos_srsq,sincos_Q, sincos_Sm
      nop.i         999
 }
-
 { .mfi
      nop.m         999
      fma.s1        sincos_P            = sincos_rcub,sincos_P, sincos_r_exact
@ -683,19 +670,12 @@ _SINCOS_COMMON:
 };;

 // If sin(denormal), force underflow to be set
-.pred.rel "mutex",p10,p11
 { .mfi
      nop.m         999
-(p10) fmpy.d.s0     fp_tmp              = f8,f8  // forces underflow flag
-      nop.i         999                          // for denormal sine args
-}
-{ .mfi
-      nop.m         999
-(p11) fma.d.s0      fp_tmp              = f8,f1, f8  // forces denormal flag
-      nop.i         999                              // for denormal cosine args
+(p10) fmpy.d.s0     fp_tmp              = sincos_NORM_f8,sincos_NORM_f8
+      nop.i         999
 };;

-
 // Final calculation
 // result = C[m]*P + Q
 { .mfb
@ -724,13 +704,22 @@ _SINCOS_SPECIAL_ARGS:
      br.ret.sptk   b0 // Exit for x = 0/Inf/NaN path
 };;

+_SINCOS_UNORM:
+// Here if x=unorm
+{ .mfb
+      getf.exp      sincos_r_signexp    = sincos_NORM_f8 // Get signexp of x 
+      fcmp.eq.s0    p11,p0              = f8, f0  // Dummy op to set denorm flag
+      br.cond.sptk  _SINCOS_COMMON2     // Return to main path
+};;
+
 GLOBAL_IEEE754_END(cos)
+
 //////////// x >= 2^27 - large arguments routine call ////////////
 LOCAL_LIBM_ENTRY(__libm_callout_sincos)
 _SINCOS_LARGE_ARGS:
 .prologue
 { .mfi
-      mov           sincos_GR_all_ones  = -1 // 0xffffffff
+      mov           GR_SAVE_r_sincos    = sincos_r_sincos // Save sin or cos
      nop.f         999
 .save ar.pfs,GR_SAVE_PFS
      mov           GR_SAVE_PFS         = ar.pfs
@ -753,7 +742,7 @@ _SINCOS_LARGE_ARGS:
 };;

 { .mbb
-      cmp.ne        p9,p0               = sincos_r_sincos, r0 // set p9 if cos
+      cmp.ne        p9,p0               = GR_SAVE_r_sincos, r0 // set p9 if cos
      nop.b         999
 (p9)  br.call.sptk.many b0              = __libm_cos_large# // cos(large_X)
 };;
--- a/sysdeps/ia64/fpu/s_cosf.S
+++ b/sysdeps/ia64/fpu/s_cosf.S
@ -408,6 +408,7 @@ GLOBAL_IEEE754_ENTRY(sinf)
 };;

 GLOBAL_IEEE754_END(sinf)
+
 GLOBAL_IEEE754_ENTRY(cosf)

 { .mlx
@ -657,6 +658,7 @@ _SINCOSF_SPECIAL_ARGS:
 };;

 GLOBAL_IEEE754_END(cosf)
+
 //////////// x >= 2^24 - large arguments routine call ////////////
 LOCAL_LIBM_ENTRY(__libm_callout_sincosf)
 _SINCOSF_LARGE_ARGS:
--- a/sysdeps/ia64/fpu/s_cosl.S
+++ b/sysdeps/ia64/fpu/s_cosl.S
@ -1,7 +1,7 @@
 .file "sincosl.s"


-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (c) 2000 - 2004, Intel Corporation
 // All rights reserved.
 //
 // Contributed 2000 by the Intel Numerics Group, Intel Corporation
@ -47,6 +47,8 @@
 // 05/13/02 Changed interface to __libm_pi_by_2_reduce
 // 02/10/03 Reordered header: .section, .global, .proc, .align;
 //          used data8 for long double table values
+// 10/13/03 Corrected final .endp name to match .proc
+// 10/26/04 Avoided using r14-31 as scratch so not clobbered by dynamic loader
 //
 //*********************************************************************
 //
@ -63,8 +65,7 @@
 //                              f32-f99
 //
 //    General Purpose Registers:
-//      r32-r43
-//      r44-r45 (Used to pass arguments to pi_by_2 reduce routine)
+//      r32-r58
 //
 //    Predicate Registers:      p6-p13
 //
@ -715,20 +716,6 @@ FR_PP_1_lo        = f98
 FR_ArgPrime       = f99
 FR_inexact        = f100

-GR_sig_inv_pi  = r14
-GR_rshf_2to64  = r15
-GR_exp_2tom64  = r16
-GR_rshf        = r17
-GR_ad_p        = r18
-GR_ad_d        = r19
-GR_ad_pp       = r20
-GR_ad_qq       = r21
-GR_ad_c        = r22
-GR_ad_s        = r23
-GR_ad_ce       = r24
-GR_ad_se       = r25
-GR_ad_m14      = r26
-GR_ad_s1       = r27
 GR_exp_m2_to_m3= r36
 GR_N_Inc       = r37
 GR_Sin_or_Cos  = r38
@ -739,6 +726,21 @@ GR_exp_2_to_63 = r42
 GR_exp_2_to_m3 = r43
 GR_exp_2_to_24 = r44

+GR_sig_inv_pi  = r45
+GR_rshf_2to64  = r46
+GR_exp_2tom64  = r47
+GR_rshf        = r48
+GR_ad_p        = r49
+GR_ad_d        = r50
+GR_ad_pp       = r51
+GR_ad_qq       = r52
+GR_ad_c        = r53
+GR_ad_s        = r54
+GR_ad_ce       = r55
+GR_ad_se       = r56
+GR_ad_m14      = r57
+GR_ad_s1       = r58
+
 // Added for unwind support

 GR_SAVE_B0     = r39
@ -750,7 +752,7 @@ GR_SAVE_PFS    = r41

 GLOBAL_IEEE754_ENTRY(sinl)
 { .mlx
-      alloc r32 = ar.pfs,0,12,2,0
+      alloc r32 = ar.pfs,0,27,2,0
      movl GR_sig_inv_pi = 0xa2f9836e4e44152a // significand of 1/pi
 }
 { .mlx
@ -772,9 +774,10 @@ GLOBAL_IEEE754_ENTRY(sinl)
 ;;

 GLOBAL_IEEE754_END(sinl)
+
 GLOBAL_IEEE754_ENTRY(cosl)
 { .mlx
-      alloc r32 = ar.pfs,0,12,2,0
+      alloc r32 = ar.pfs,0,27,2,0
      movl GR_sig_inv_pi = 0xa2f9836e4e44152a // significand of 1/pi
 }
 { .mlx
@ -2285,6 +2288,7 @@ SINCOSL_SPECIAL:
 }

 GLOBAL_IEEE754_END(cosl)
+
 // *******************************************************************
 // *******************************************************************
 // *******************************************************************
@ -2299,7 +2303,7 @@ GLOBAL_IEEE754_END(cosl)
 //         c is in f9
 //         N is in r8
 //     Be sure to allocate at least 2 GP registers as output registers for
-//     __libm_pi_by_2_reduce.  This routine uses r49-50. These are used as
+//     __libm_pi_by_2_reduce.  This routine uses r59-60. These are used as
 //     scratch registers within the __libm_pi_by_2_reduce routine (for speed).
 //
 //     We know also that __libm_pi_by_2_reduce preserves f10-15, f71-127.  We
@ -2356,6 +2360,6 @@ SINCOSL_ARG_TOO_LARGE:
        br.cond.sptk SINCOSL_NORMAL_R    // Branch if |r|>=2^-3 for |x| >= 2^63
 };;

-.endp
+LOCAL_LIBM_END(__libm_callout)
 .type   __libm_pi_by_2_reduce#,@function
 .global __libm_pi_by_2_reduce#
--- a/sysdeps/ia64/fpu/s_erf.S
+++ b/sysdeps/ia64/fpu/s_erf.S
@ -922,3 +922,4 @@ erf_denormal:

 GLOBAL_LIBM_END(erf)

+
--- a/sysdeps/ia64/fpu/s_erfc.S
+++ b/sysdeps/ia64/fpu/s_erfc.S
@ -1135,6 +1135,7 @@ GLOBAL_LIBM_ENTRY(erfc)
 };;

 GLOBAL_LIBM_END(erfc)
+
 // call via (p15) br.cond.spnt   __libm_error_region
 //          for  x > ARG_ASYMP = 28.0
 // or
--- a/Show More
+++ b/Show More