mirror of
				https://sourceware.org/git/glibc.git
				synced 2025-10-24 13:33:08 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			238 lines
		
	
	
		
			5.5 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
			
		
		
	
	
			238 lines
		
	
	
		
			5.5 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
| ! SPARC __mpn_add_n -- Add two limb vectors of the same length > 0 and store
 | |
| ! sum in a third limb vector.
 | |
| !
 | |
| ! Copyright (C) 1995-2014 Free Software Foundation, Inc.
 | |
| !
 | |
| ! This file is part of the GNU MP Library.
 | |
| !
 | |
| ! The GNU MP Library is free software; you can redistribute it and/or modify
 | |
| ! it under the terms of the GNU Lesser General Public License as published by
 | |
| ! the Free Software Foundation; either version 2.1 of the License, or (at your
 | |
| ! option) any later version.
 | |
| !
 | |
| ! The GNU MP Library is distributed in the hope that it will be useful, but
 | |
| ! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 | |
| ! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
 | |
| ! License for more details.
 | |
| !
 | |
| ! You should have received a copy of the GNU Lesser General Public License
 | |
| ! along with the GNU MP Library; see the file COPYING.LIB.  If not,
 | |
| ! see <http://www.gnu.org/licenses/>.
 | |
| 
 | |
| 
 | |
| ! INPUT PARAMETERS
 | |
| #define RES_PTR	%o0
 | |
| #define S1_PTR	%o1
 | |
| #define S2_PTR	%o2
 | |
| #define SIZE	%o3
 | |
| 
 | |
| #include <sysdep.h>
 | |
| 
 | |
| ENTRY(__mpn_add_n)
 | |
| 	xor	S2_PTR,RES_PTR,%g1
 | |
| 	andcc	%g1,4,%g0
 | |
| 	bne	LOC(1)			! branch if alignment differs
 | |
| 	 nop
 | |
| ! **  V1a  **
 | |
| LOC(0):	andcc	RES_PTR,4,%g0		! RES_PTR unaligned? Side effect: cy=0
 | |
| 	be	LOC(v1)			! if no, branch
 | |
| 	 nop
 | |
| /* Add least significant limb separately to align RES_PTR and S2_PTR */
 | |
| 	ld	[S1_PTR],%g4
 | |
| 	add	S1_PTR,4,S1_PTR
 | |
| 	ld	[S2_PTR],%g2
 | |
| 	add	S2_PTR,4,S2_PTR
 | |
| 	add	SIZE,-1,SIZE
 | |
| 	addcc	%g4,%g2,%o4
 | |
| 	st	%o4,[RES_PTR]
 | |
| 	add	RES_PTR,4,RES_PTR
 | |
| LOC(v1):
 | |
| 	addx	%g0,%g0,%o4		! save cy in register
 | |
| 	cmp	SIZE,2			! if SIZE < 2 ...
 | |
| 	bl	LOC(end2)		! ... branch to tail code
 | |
| 	subcc	%g0,%o4,%g0		! restore cy
 | |
| 
 | |
| 	ld	[S1_PTR+0],%g4
 | |
| 	addcc	SIZE,-10,SIZE
 | |
| 	ld	[S1_PTR+4],%g1
 | |
| 	ldd	[S2_PTR+0],%g2
 | |
| 	blt	LOC(fin1)
 | |
| 	subcc	%g0,%o4,%g0		! restore cy
 | |
| /* Add blocks of 8 limbs until less than 8 limbs remain */
 | |
| LOC(loop1):
 | |
| 	addxcc	%g4,%g2,%o4
 | |
| 	ld	[S1_PTR+8],%g4
 | |
| 	addxcc	%g1,%g3,%o5
 | |
| 	ld	[S1_PTR+12],%g1
 | |
| 	ldd	[S2_PTR+8],%g2
 | |
| 	std	%o4,[RES_PTR+0]
 | |
| 	addxcc	%g4,%g2,%o4
 | |
| 	ld	[S1_PTR+16],%g4
 | |
| 	addxcc	%g1,%g3,%o5
 | |
| 	ld	[S1_PTR+20],%g1
 | |
| 	ldd	[S2_PTR+16],%g2
 | |
| 	std	%o4,[RES_PTR+8]
 | |
| 	addxcc	%g4,%g2,%o4
 | |
| 	ld	[S1_PTR+24],%g4
 | |
| 	addxcc	%g1,%g3,%o5
 | |
| 	ld	[S1_PTR+28],%g1
 | |
| 	ldd	[S2_PTR+24],%g2
 | |
| 	std	%o4,[RES_PTR+16]
 | |
| 	addxcc	%g4,%g2,%o4
 | |
| 	ld	[S1_PTR+32],%g4
 | |
| 	addxcc	%g1,%g3,%o5
 | |
| 	ld	[S1_PTR+36],%g1
 | |
| 	ldd	[S2_PTR+32],%g2
 | |
| 	std	%o4,[RES_PTR+24]
 | |
| 	addx	%g0,%g0,%o4		! save cy in register
 | |
| 	addcc	SIZE,-8,SIZE
 | |
| 	add	S1_PTR,32,S1_PTR
 | |
| 	add	S2_PTR,32,S2_PTR
 | |
| 	add	RES_PTR,32,RES_PTR
 | |
| 	bge	LOC(loop1)
 | |
| 	subcc	%g0,%o4,%g0		! restore cy
 | |
| 
 | |
| LOC(fin1):
 | |
| 	addcc	SIZE,8-2,SIZE
 | |
| 	blt	LOC(end1)
 | |
| 	subcc	%g0,%o4,%g0		! restore cy
 | |
| /* Add blocks of 2 limbs until less than 2 limbs remain */
 | |
| LOC(loope1):
 | |
| 	addxcc	%g4,%g2,%o4
 | |
| 	ld	[S1_PTR+8],%g4
 | |
| 	addxcc	%g1,%g3,%o5
 | |
| 	ld	[S1_PTR+12],%g1
 | |
| 	ldd	[S2_PTR+8],%g2
 | |
| 	std	%o4,[RES_PTR+0]
 | |
| 	addx	%g0,%g0,%o4		! save cy in register
 | |
| 	addcc	SIZE,-2,SIZE
 | |
| 	add	S1_PTR,8,S1_PTR
 | |
| 	add	S2_PTR,8,S2_PTR
 | |
| 	add	RES_PTR,8,RES_PTR
 | |
| 	bge	LOC(loope1)
 | |
| 	subcc	%g0,%o4,%g0		! restore cy
 | |
| LOC(end1):
 | |
| 	addxcc	%g4,%g2,%o4
 | |
| 	addxcc	%g1,%g3,%o5
 | |
| 	std	%o4,[RES_PTR+0]
 | |
| 	addx	%g0,%g0,%o4		! save cy in register
 | |
| 
 | |
| 	andcc	SIZE,1,%g0
 | |
| 	be	LOC(ret1)
 | |
| 	subcc	%g0,%o4,%g0		! restore cy
 | |
| /* Add last limb */
 | |
| 	ld	[S1_PTR+8],%g4
 | |
| 	ld	[S2_PTR+8],%g2
 | |
| 	addxcc	%g4,%g2,%o4
 | |
| 	st	%o4,[RES_PTR+8]
 | |
| 
 | |
| LOC(ret1):
 | |
| 	retl
 | |
| 	addx	%g0,%g0,%o0	! return carry-out from most sign. limb
 | |
| 
 | |
| LOC(1):	xor	S1_PTR,RES_PTR,%g1
 | |
| 	andcc	%g1,4,%g0
 | |
| 	bne	LOC(2)
 | |
| 	nop
 | |
| ! **  V1b  **
 | |
| 	mov	S2_PTR,%g1
 | |
| 	mov	S1_PTR,S2_PTR
 | |
| 	b	LOC(0)
 | |
| 	mov	%g1,S1_PTR
 | |
| 
 | |
| ! **  V2  **
 | |
| /* If we come here, the alignment of S1_PTR and RES_PTR as well as the
 | |
|    alignment of S2_PTR and RES_PTR differ.  Since there are only two ways
 | |
|    things can be aligned (that we care about) we now know that the alignment
 | |
|    of S1_PTR and S2_PTR are the same.  */
 | |
| 
 | |
| LOC(2):	cmp	SIZE,1
 | |
| 	be	LOC(jone)
 | |
| 	nop
 | |
| 	andcc	S1_PTR,4,%g0		! S1_PTR unaligned? Side effect: cy=0
 | |
| 	be	LOC(v2)			! if no, branch
 | |
| 	nop
 | |
| /* Add least significant limb separately to align S1_PTR and S2_PTR */
 | |
| 	ld	[S1_PTR],%g4
 | |
| 	add	S1_PTR,4,S1_PTR
 | |
| 	ld	[S2_PTR],%g2
 | |
| 	add	S2_PTR,4,S2_PTR
 | |
| 	add	SIZE,-1,SIZE
 | |
| 	addcc	%g4,%g2,%o4
 | |
| 	st	%o4,[RES_PTR]
 | |
| 	add	RES_PTR,4,RES_PTR
 | |
| 
 | |
| LOC(v2):
 | |
| 	addx	%g0,%g0,%o4		! save cy in register
 | |
| 	addcc	SIZE,-8,SIZE
 | |
| 	blt	LOC(fin2)
 | |
| 	subcc	%g0,%o4,%g0		! restore cy
 | |
| /* Add blocks of 8 limbs until less than 8 limbs remain */
 | |
| LOC(loop2):
 | |
| 	ldd	[S1_PTR+0],%g2
 | |
| 	ldd	[S2_PTR+0],%o4
 | |
| 	addxcc	%g2,%o4,%g2
 | |
| 	st	%g2,[RES_PTR+0]
 | |
| 	addxcc	%g3,%o5,%g3
 | |
| 	st	%g3,[RES_PTR+4]
 | |
| 	ldd	[S1_PTR+8],%g2
 | |
| 	ldd	[S2_PTR+8],%o4
 | |
| 	addxcc	%g2,%o4,%g2
 | |
| 	st	%g2,[RES_PTR+8]
 | |
| 	addxcc	%g3,%o5,%g3
 | |
| 	st	%g3,[RES_PTR+12]
 | |
| 	ldd	[S1_PTR+16],%g2
 | |
| 	ldd	[S2_PTR+16],%o4
 | |
| 	addxcc	%g2,%o4,%g2
 | |
| 	st	%g2,[RES_PTR+16]
 | |
| 	addxcc	%g3,%o5,%g3
 | |
| 	st	%g3,[RES_PTR+20]
 | |
| 	ldd	[S1_PTR+24],%g2
 | |
| 	ldd	[S2_PTR+24],%o4
 | |
| 	addxcc	%g2,%o4,%g2
 | |
| 	st	%g2,[RES_PTR+24]
 | |
| 	addxcc	%g3,%o5,%g3
 | |
| 	st	%g3,[RES_PTR+28]
 | |
| 	addx	%g0,%g0,%o4		! save cy in register
 | |
| 	addcc	SIZE,-8,SIZE
 | |
| 	add	S1_PTR,32,S1_PTR
 | |
| 	add	S2_PTR,32,S2_PTR
 | |
| 	add	RES_PTR,32,RES_PTR
 | |
| 	bge	LOC(loop2)
 | |
| 	subcc	%g0,%o4,%g0		! restore cy
 | |
| 
 | |
| LOC(fin2):
 | |
| 	addcc	SIZE,8-2,SIZE
 | |
| 	blt	LOC(end2)
 | |
| 	subcc	%g0,%o4,%g0		! restore cy
 | |
| LOC(loope2):
 | |
| 	ldd	[S1_PTR+0],%g2
 | |
| 	ldd	[S2_PTR+0],%o4
 | |
| 	addxcc	%g2,%o4,%g2
 | |
| 	st	%g2,[RES_PTR+0]
 | |
| 	addxcc	%g3,%o5,%g3
 | |
| 	st	%g3,[RES_PTR+4]
 | |
| 	addx	%g0,%g0,%o4		! save cy in register
 | |
| 	addcc	SIZE,-2,SIZE
 | |
| 	add	S1_PTR,8,S1_PTR
 | |
| 	add	S2_PTR,8,S2_PTR
 | |
| 	add	RES_PTR,8,RES_PTR
 | |
| 	bge	LOC(loope2)
 | |
| 	subcc	%g0,%o4,%g0		! restore cy
 | |
| LOC(end2):
 | |
| 	andcc	SIZE,1,%g0
 | |
| 	be	LOC(ret2)
 | |
| 	subcc	%g0,%o4,%g0		! restore cy
 | |
| /* Add last limb */
 | |
| LOC(jone):
 | |
| 	ld	[S1_PTR],%g4
 | |
| 	ld	[S2_PTR],%g2
 | |
| 	addxcc	%g4,%g2,%o4
 | |
| 	st	%o4,[RES_PTR]
 | |
| 
 | |
| LOC(ret2):
 | |
| 	retl
 | |
| 	addx	%g0,%g0,%o0	! return carry-out from most sign. limb
 | |
| 
 | |
| END(__mpn_add_n)
 |