mirror of
				https://sourceware.org/git/glibc.git
				synced 2025-11-03 20:53:13 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			238 lines
		
	
	
		
			5.5 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
			
		
		
	
	
			238 lines
		
	
	
		
			5.5 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
! SPARC __mpn_add_n -- Add two limb vectors of the same length > 0 and store
 | 
						|
! sum in a third limb vector.
 | 
						|
!
 | 
						|
! Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
 | 
						|
!
 | 
						|
! This file is part of the GNU MP Library.
 | 
						|
!
 | 
						|
! The GNU MP Library is free software; you can redistribute it and/or modify
 | 
						|
! it under the terms of the GNU Lesser General Public License as published by
 | 
						|
! the Free Software Foundation; either version 2.1 of the License, or (at your
 | 
						|
! option) any later version.
 | 
						|
!
 | 
						|
! The GNU MP Library is distributed in the hope that it will be useful, but
 | 
						|
! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 | 
						|
! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
 | 
						|
! License for more details.
 | 
						|
!
 | 
						|
! You should have received a copy of the GNU Lesser General Public License
 | 
						|
! along with the GNU MP Library; see the file COPYING.LIB.  If not,
 | 
						|
! see <http://www.gnu.org/licenses/>.
 | 
						|
 | 
						|
 | 
						|
! INPUT PARAMETERS
 | 
						|
#define RES_PTR	%o0
 | 
						|
#define S1_PTR	%o1
 | 
						|
#define S2_PTR	%o2
 | 
						|
#define SIZE	%o3
 | 
						|
 | 
						|
#include <sysdep.h>
 | 
						|
 | 
						|
ENTRY(__mpn_add_n)
 | 
						|
	xor	S2_PTR,RES_PTR,%g1
 | 
						|
	andcc	%g1,4,%g0
 | 
						|
	bne	LOC(1)			! branch if alignment differs
 | 
						|
	 nop
 | 
						|
! **  V1a  **
 | 
						|
LOC(0):	andcc	RES_PTR,4,%g0		! RES_PTR unaligned? Side effect: cy=0
 | 
						|
	be	LOC(v1)			! if no, branch
 | 
						|
	 nop
 | 
						|
/* Add least significant limb separately to align RES_PTR and S2_PTR */
 | 
						|
	ld	[S1_PTR],%g4
 | 
						|
	add	S1_PTR,4,S1_PTR
 | 
						|
	ld	[S2_PTR],%g2
 | 
						|
	add	S2_PTR,4,S2_PTR
 | 
						|
	add	SIZE,-1,SIZE
 | 
						|
	addcc	%g4,%g2,%o4
 | 
						|
	st	%o4,[RES_PTR]
 | 
						|
	add	RES_PTR,4,RES_PTR
 | 
						|
LOC(v1):
 | 
						|
	addx	%g0,%g0,%o4		! save cy in register
 | 
						|
	cmp	SIZE,2			! if SIZE < 2 ...
 | 
						|
	bl	LOC(end2)		! ... branch to tail code
 | 
						|
	subcc	%g0,%o4,%g0		! restore cy
 | 
						|
 | 
						|
	ld	[S1_PTR+0],%g4
 | 
						|
	addcc	SIZE,-10,SIZE
 | 
						|
	ld	[S1_PTR+4],%g1
 | 
						|
	ldd	[S2_PTR+0],%g2
 | 
						|
	blt	LOC(fin1)
 | 
						|
	subcc	%g0,%o4,%g0		! restore cy
 | 
						|
/* Add blocks of 8 limbs until less than 8 limbs remain */
 | 
						|
LOC(loop1):
 | 
						|
	addxcc	%g4,%g2,%o4
 | 
						|
	ld	[S1_PTR+8],%g4
 | 
						|
	addxcc	%g1,%g3,%o5
 | 
						|
	ld	[S1_PTR+12],%g1
 | 
						|
	ldd	[S2_PTR+8],%g2
 | 
						|
	std	%o4,[RES_PTR+0]
 | 
						|
	addxcc	%g4,%g2,%o4
 | 
						|
	ld	[S1_PTR+16],%g4
 | 
						|
	addxcc	%g1,%g3,%o5
 | 
						|
	ld	[S1_PTR+20],%g1
 | 
						|
	ldd	[S2_PTR+16],%g2
 | 
						|
	std	%o4,[RES_PTR+8]
 | 
						|
	addxcc	%g4,%g2,%o4
 | 
						|
	ld	[S1_PTR+24],%g4
 | 
						|
	addxcc	%g1,%g3,%o5
 | 
						|
	ld	[S1_PTR+28],%g1
 | 
						|
	ldd	[S2_PTR+24],%g2
 | 
						|
	std	%o4,[RES_PTR+16]
 | 
						|
	addxcc	%g4,%g2,%o4
 | 
						|
	ld	[S1_PTR+32],%g4
 | 
						|
	addxcc	%g1,%g3,%o5
 | 
						|
	ld	[S1_PTR+36],%g1
 | 
						|
	ldd	[S2_PTR+32],%g2
 | 
						|
	std	%o4,[RES_PTR+24]
 | 
						|
	addx	%g0,%g0,%o4		! save cy in register
 | 
						|
	addcc	SIZE,-8,SIZE
 | 
						|
	add	S1_PTR,32,S1_PTR
 | 
						|
	add	S2_PTR,32,S2_PTR
 | 
						|
	add	RES_PTR,32,RES_PTR
 | 
						|
	bge	LOC(loop1)
 | 
						|
	subcc	%g0,%o4,%g0		! restore cy
 | 
						|
 | 
						|
LOC(fin1):
 | 
						|
	addcc	SIZE,8-2,SIZE
 | 
						|
	blt	LOC(end1)
 | 
						|
	subcc	%g0,%o4,%g0		! restore cy
 | 
						|
/* Add blocks of 2 limbs until less than 2 limbs remain */
 | 
						|
LOC(loope1):
 | 
						|
	addxcc	%g4,%g2,%o4
 | 
						|
	ld	[S1_PTR+8],%g4
 | 
						|
	addxcc	%g1,%g3,%o5
 | 
						|
	ld	[S1_PTR+12],%g1
 | 
						|
	ldd	[S2_PTR+8],%g2
 | 
						|
	std	%o4,[RES_PTR+0]
 | 
						|
	addx	%g0,%g0,%o4		! save cy in register
 | 
						|
	addcc	SIZE,-2,SIZE
 | 
						|
	add	S1_PTR,8,S1_PTR
 | 
						|
	add	S2_PTR,8,S2_PTR
 | 
						|
	add	RES_PTR,8,RES_PTR
 | 
						|
	bge	LOC(loope1)
 | 
						|
	subcc	%g0,%o4,%g0		! restore cy
 | 
						|
LOC(end1):
 | 
						|
	addxcc	%g4,%g2,%o4
 | 
						|
	addxcc	%g1,%g3,%o5
 | 
						|
	std	%o4,[RES_PTR+0]
 | 
						|
	addx	%g0,%g0,%o4		! save cy in register
 | 
						|
 | 
						|
	andcc	SIZE,1,%g0
 | 
						|
	be	LOC(ret1)
 | 
						|
	subcc	%g0,%o4,%g0		! restore cy
 | 
						|
/* Add last limb */
 | 
						|
	ld	[S1_PTR+8],%g4
 | 
						|
	ld	[S2_PTR+8],%g2
 | 
						|
	addxcc	%g4,%g2,%o4
 | 
						|
	st	%o4,[RES_PTR+8]
 | 
						|
 | 
						|
LOC(ret1):
 | 
						|
	retl
 | 
						|
	addx	%g0,%g0,%o0	! return carry-out from most sign. limb
 | 
						|
 | 
						|
LOC(1):	xor	S1_PTR,RES_PTR,%g1
 | 
						|
	andcc	%g1,4,%g0
 | 
						|
	bne	LOC(2)
 | 
						|
	nop
 | 
						|
! **  V1b  **
 | 
						|
	mov	S2_PTR,%g1
 | 
						|
	mov	S1_PTR,S2_PTR
 | 
						|
	b	LOC(0)
 | 
						|
	mov	%g1,S1_PTR
 | 
						|
 | 
						|
! **  V2  **
 | 
						|
/* If we come here, the alignment of S1_PTR and RES_PTR as well as the
 | 
						|
   alignment of S2_PTR and RES_PTR differ.  Since there are only two ways
 | 
						|
   things can be aligned (that we care about) we now know that the alignment
 | 
						|
   of S1_PTR and S2_PTR are the same.  */
 | 
						|
 | 
						|
LOC(2):	cmp	SIZE,1
 | 
						|
	be	LOC(jone)
 | 
						|
	nop
 | 
						|
	andcc	S1_PTR,4,%g0		! S1_PTR unaligned? Side effect: cy=0
 | 
						|
	be	LOC(v2)			! if no, branch
 | 
						|
	nop
 | 
						|
/* Add least significant limb separately to align S1_PTR and S2_PTR */
 | 
						|
	ld	[S1_PTR],%g4
 | 
						|
	add	S1_PTR,4,S1_PTR
 | 
						|
	ld	[S2_PTR],%g2
 | 
						|
	add	S2_PTR,4,S2_PTR
 | 
						|
	add	SIZE,-1,SIZE
 | 
						|
	addcc	%g4,%g2,%o4
 | 
						|
	st	%o4,[RES_PTR]
 | 
						|
	add	RES_PTR,4,RES_PTR
 | 
						|
 | 
						|
LOC(v2):
 | 
						|
	addx	%g0,%g0,%o4		! save cy in register
 | 
						|
	addcc	SIZE,-8,SIZE
 | 
						|
	blt	LOC(fin2)
 | 
						|
	subcc	%g0,%o4,%g0		! restore cy
 | 
						|
/* Add blocks of 8 limbs until less than 8 limbs remain */
 | 
						|
LOC(loop2):
 | 
						|
	ldd	[S1_PTR+0],%g2
 | 
						|
	ldd	[S2_PTR+0],%o4
 | 
						|
	addxcc	%g2,%o4,%g2
 | 
						|
	st	%g2,[RES_PTR+0]
 | 
						|
	addxcc	%g3,%o5,%g3
 | 
						|
	st	%g3,[RES_PTR+4]
 | 
						|
	ldd	[S1_PTR+8],%g2
 | 
						|
	ldd	[S2_PTR+8],%o4
 | 
						|
	addxcc	%g2,%o4,%g2
 | 
						|
	st	%g2,[RES_PTR+8]
 | 
						|
	addxcc	%g3,%o5,%g3
 | 
						|
	st	%g3,[RES_PTR+12]
 | 
						|
	ldd	[S1_PTR+16],%g2
 | 
						|
	ldd	[S2_PTR+16],%o4
 | 
						|
	addxcc	%g2,%o4,%g2
 | 
						|
	st	%g2,[RES_PTR+16]
 | 
						|
	addxcc	%g3,%o5,%g3
 | 
						|
	st	%g3,[RES_PTR+20]
 | 
						|
	ldd	[S1_PTR+24],%g2
 | 
						|
	ldd	[S2_PTR+24],%o4
 | 
						|
	addxcc	%g2,%o4,%g2
 | 
						|
	st	%g2,[RES_PTR+24]
 | 
						|
	addxcc	%g3,%o5,%g3
 | 
						|
	st	%g3,[RES_PTR+28]
 | 
						|
	addx	%g0,%g0,%o4		! save cy in register
 | 
						|
	addcc	SIZE,-8,SIZE
 | 
						|
	add	S1_PTR,32,S1_PTR
 | 
						|
	add	S2_PTR,32,S2_PTR
 | 
						|
	add	RES_PTR,32,RES_PTR
 | 
						|
	bge	LOC(loop2)
 | 
						|
	subcc	%g0,%o4,%g0		! restore cy
 | 
						|
 | 
						|
LOC(fin2):
 | 
						|
	addcc	SIZE,8-2,SIZE
 | 
						|
	blt	LOC(end2)
 | 
						|
	subcc	%g0,%o4,%g0		! restore cy
 | 
						|
LOC(loope2):
 | 
						|
	ldd	[S1_PTR+0],%g2
 | 
						|
	ldd	[S2_PTR+0],%o4
 | 
						|
	addxcc	%g2,%o4,%g2
 | 
						|
	st	%g2,[RES_PTR+0]
 | 
						|
	addxcc	%g3,%o5,%g3
 | 
						|
	st	%g3,[RES_PTR+4]
 | 
						|
	addx	%g0,%g0,%o4		! save cy in register
 | 
						|
	addcc	SIZE,-2,SIZE
 | 
						|
	add	S1_PTR,8,S1_PTR
 | 
						|
	add	S2_PTR,8,S2_PTR
 | 
						|
	add	RES_PTR,8,RES_PTR
 | 
						|
	bge	LOC(loope2)
 | 
						|
	subcc	%g0,%o4,%g0		! restore cy
 | 
						|
LOC(end2):
 | 
						|
	andcc	SIZE,1,%g0
 | 
						|
	be	LOC(ret2)
 | 
						|
	subcc	%g0,%o4,%g0		! restore cy
 | 
						|
/* Add last limb */
 | 
						|
LOC(jone):
 | 
						|
	ld	[S1_PTR],%g4
 | 
						|
	ld	[S2_PTR],%g2
 | 
						|
	addxcc	%g4,%g2,%o4
 | 
						|
	st	%o4,[RES_PTR]
 | 
						|
 | 
						|
LOC(ret2):
 | 
						|
	retl
 | 
						|
	addx	%g0,%g0,%o0	! return carry-out from most sign. limb
 | 
						|
 | 
						|
END(__mpn_add_n)
 |