mirror of
				https://sourceware.org/git/glibc.git
				synced 2025-10-30 10:45:40 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			351 lines
		
	
	
		
			7.3 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
			
		
		
	
	
			351 lines
		
	
	
		
			7.3 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
| /* From the Intel IA-64 Optimization Guide, choose the minimum latency
 | |
|    alternative.  */
 | |
| 
 | |
| #include <sysdep.h>
 | |
| #undef ret
 | |
| 
 | |
| #include <shlib-compat.h>
 | |
| 
 | |
| #if SHLIB_COMPAT(libc, GLIBC_2_2, GLIBC_2_2_6)
 | |
| 
 | |
| /* __divtf3
 | |
|    Compute a 80-bit IEEE double-extended quotient.
 | |
|    farg0 holds the dividend.  farg1 holds the divisor.  */
 | |
| 
 | |
| ENTRY(___divtf3)
 | |
| 	cmp.eq p7, p0 = r0, r0
 | |
| 	frcpa.s0 f10, p6 = farg0, farg1
 | |
| 	;;
 | |
| (p6)	cmp.ne p7, p0 = r0, r0
 | |
| 	.pred.rel.mutex p6, p7
 | |
| (p6)	fnma.s1 f11 = farg1, f10, f1
 | |
| (p6)	fma.s1 f12 = farg0, f10, f0
 | |
| 	;;
 | |
| (p6)	fma.s1 f13 = f11, f11, f0
 | |
| (p6)	fma.s1 f14 = f11, f11, f11
 | |
| 	;;
 | |
| (p6)	fma.s1 f11 = f13, f13, f11
 | |
| (p6)	fma.s1 f13 = f14, f10, f10
 | |
| 	;;
 | |
| (p6)	fma.s1 f10 = f13, f11, f10
 | |
| (p6)	fnma.s1 f11 = farg1, f12, farg0
 | |
| 	;;
 | |
| (p6)	fma.s1 f11 = f11, f10, f12
 | |
| (p6)	fnma.s1 f12 = farg1, f10, f1
 | |
| 	;;
 | |
| (p6)	fma.s1 f10 = f12, f10, f10
 | |
| (p6)	fnma.s1 f12 = farg1, f11, farg0
 | |
| 	;;
 | |
| (p6)	fma.s0 fret0 = f12, f10, f11
 | |
| (p7)	mov fret0 = f10
 | |
| 	br.ret.sptk rp
 | |
| END(___divtf3)
 | |
| 	.symver ___divtf3, __divtf3@GLIBC_2.2
 | |
| 
 | |
| /* __divdf3
 | |
|    Compute a 64-bit IEEE double quotient.
 | |
|    farg0 holds the dividend.  farg1 holds the divisor.  */
 | |
| 
 | |
| ENTRY(___divdf3)
 | |
| 	cmp.eq p7, p0 = r0, r0
 | |
| 	frcpa.s0 f10, p6 = farg0, farg1
 | |
| 	;;
 | |
| (p6)	cmp.ne p7, p0 = r0, r0
 | |
| 	.pred.rel.mutex p6, p7
 | |
| (p6)	fmpy.s1 f11 = farg0, f10
 | |
| (p6)	fnma.s1 f12 = farg1, f10, f1
 | |
| 	;;
 | |
| (p6)	fma.s1 f11 = f12, f11, f11
 | |
| (p6)	fmpy.s1 f13 = f12, f12
 | |
| 	;;
 | |
| (p6)	fma.s1 f10 = f12, f10, f10
 | |
| (p6)	fma.s1 f11 = f13, f11, f11
 | |
| 	;;
 | |
| (p6)	fmpy.s1 f12 = f13, f13
 | |
| (p6)	fma.s1 f10 = f13, f10, f10
 | |
| 	;;
 | |
| (p6)	fma.d.s1 f11 = f12, f11, f11
 | |
| (p6)	fma.s1 f10 = f12, f10, f10
 | |
| 	;;
 | |
| (p6)	fnma.d.s1 f8 = farg1, f11, farg0
 | |
| 	;;
 | |
| (p6)	fma.d fret0 = f8, f10, f11
 | |
| (p7)	mov fret0 = f10
 | |
| 	br.ret.sptk rp
 | |
| 	;;
 | |
| END(___divdf3)
 | |
| 	.symver	___divdf3, __divdf3@GLIBC_2.2
 | |
| 
 | |
| /* __divsf3
 | |
|    Compute a 32-bit IEEE float quotient.
 | |
|    farg0 holds the dividend.  farg1 holds the divisor.  */
 | |
| 
 | |
| ENTRY(___divsf3)
 | |
| 	cmp.eq p7, p0 = r0, r0
 | |
| 	frcpa.s0 f10, p6 = farg0, farg1
 | |
| 	;;
 | |
| (p6)	cmp.ne p7, p0 = r0, r0
 | |
| 	.pred.rel.mutex p6, p7
 | |
| (p6)	fmpy.s1 f8 = farg0, f10
 | |
| (p6)	fnma.s1 f9 = farg1, f10, f1
 | |
| 	;;
 | |
| (p6)	fma.s1 f8 = f9, f8, f8
 | |
| (p6)	fmpy.s1 f9 = f9, f9
 | |
| 	;;
 | |
| (p6)	fma.s1 f8 = f9, f8, f8
 | |
| (p6)	fmpy.s1 f9 = f9, f9
 | |
| 	;;
 | |
| (p6)	fma.d.s1 f10 = f9, f8, f8
 | |
| 	;;
 | |
| (p6)	fnorm.s.s0 fret0 = f10
 | |
| (p7)	mov fret0 = f10
 | |
| 	br.ret.sptk rp
 | |
| 	;;
 | |
| END(___divsf3)
 | |
| 	.symver	___divsf3, __divsf3@GLIBC_2.2
 | |
| 
 | |
| /* __divdi3
 | |
|    Compute a 64-bit integer quotient.
 | |
|    in0 holds the dividend.  in1 holds the divisor.  */
 | |
| 
 | |
| ENTRY(___divdi3)
 | |
| 	.regstk 2,0,0,0
 | |
| 	/* Transfer inputs to FP registers.  */
 | |
| 	setf.sig f8 = in0
 | |
| 	setf.sig f9 = in1
 | |
| 	;;
 | |
| 	/* Convert the inputs to FP, so that they won't be treated as
 | |
| 	   unsigned.  */
 | |
| 	fcvt.xf f8 = f8
 | |
| 	fcvt.xf f9 = f9
 | |
| 	;;
 | |
| 	/* Compute the reciprocal approximation.  */
 | |
| 	frcpa.s1 f10, p6 = f8, f9
 | |
| 	;;
 | |
| 	/* 3 Newton-Raphson iterations.  */
 | |
| (p6)	fnma.s1 f11 = f9, f10, f1
 | |
| (p6)	fmpy.s1 f12 = f8, f10
 | |
| 	;;
 | |
| (p6)	fmpy.s1 f13 = f11, f11
 | |
| (p6)	fma.s1 f12 = f11, f12, f12
 | |
| 	;;
 | |
| (p6)	fma.s1 f10 = f11, f10, f10
 | |
| (p6)	fma.s1 f11 = f13, f12, f12
 | |
| 	;;
 | |
| (p6)	fma.s1 f10 = f13, f10, f10
 | |
| (p6)	fnma.s1 f12 = f9, f11, f8
 | |
| 	;;
 | |
| (p6)	fma.s1 f10 = f12, f10, f11
 | |
| 	;;
 | |
| 	/* Round quotient to an integer.  */
 | |
| 	fcvt.fx.trunc.s1 f10 = f10
 | |
| 	;;
 | |
| 	/* Transfer result to GP registers.  */
 | |
| 	getf.sig ret0 = f10
 | |
| 	br.ret.sptk rp
 | |
| 	;;
 | |
| END(___divdi3)
 | |
| 	.symver	___divdi3, __divdi3@GLIBC_2.2
 | |
| 
 | |
| /* __moddi3
 | |
|    Compute a 64-bit integer modulus.
 | |
|    in0 holds the dividend (a).  in1 holds the divisor (b).  */
 | |
| 
 | |
| ENTRY(___moddi3)
 | |
| 	.regstk 2,0,0,0
 | |
| 	/* Transfer inputs to FP registers.  */
 | |
| 	setf.sig f14 = in0
 | |
| 	setf.sig f9 = in1
 | |
| 	;;
 | |
| 	/* Convert the inputs to FP, so that they won't be treated as
 | |
| 	   unsigned.  */
 | |
| 	fcvt.xf f8 = f14
 | |
| 	fcvt.xf f9 = f9
 | |
| 	;;
 | |
| 	/* Compute the reciprocal approximation.  */
 | |
| 	frcpa.s1 f10, p6 = f8, f9
 | |
| 	;;
 | |
| 	/* 3 Newton-Raphson iterations.  */
 | |
| (p6)	fmpy.s1 f12 = f8, f10
 | |
| (p6)	fnma.s1 f11 = f9, f10, f1
 | |
| 	;;
 | |
| (p6)	fma.s1 f12 = f11, f12, f12
 | |
| (p6)	fmpy.s1 f13 = f11, f11
 | |
| 	;;
 | |
| (p6)	fma.s1 f10 = f11, f10, f10
 | |
| (p6)	fma.s1 f11 = f13, f12, f12
 | |
| 	;;
 | |
| 	sub in1 = r0, in1
 | |
| (p6)	fma.s1 f10 = f13, f10, f10
 | |
| (p6)	fnma.s1 f12 = f9, f11, f8
 | |
| 	;;
 | |
| 	setf.sig f9 = in1
 | |
| (p6)	fma.s1 f10 = f12, f10, f11
 | |
| 	;;
 | |
| 	fcvt.fx.trunc.s1 f10 = f10
 | |
| 	;;
 | |
| 	/* r = q * (-b) + a  */
 | |
| 	xma.l f10 = f10, f9, f14
 | |
| 	;;
 | |
| 	/* Transfer result to GP registers.  */
 | |
| 	getf.sig ret0 = f10
 | |
| 	br.ret.sptk rp
 | |
| 	;;
 | |
| END(___moddi3)
 | |
| 	.symver ___moddi3, __moddi3@GLIBC_2.2
 | |
| 
 | |
| /* __udivdi3
 | |
|    Compute a 64-bit unsigned integer quotient.
 | |
|    in0 holds the dividend.  in1 holds the divisor.  */
 | |
| 
 | |
| ENTRY(___udivdi3)
 | |
| 	.regstk 2,0,0,0
 | |
| 	/* Transfer inputs to FP registers.  */
 | |
| 	setf.sig f8 = in0
 | |
| 	setf.sig f9 = in1
 | |
| 	;;
 | |
| 	/* Convert the inputs to FP, to avoid FP software-assist faults.  */
 | |
| 	fcvt.xuf.s1 f8 = f8
 | |
| 	fcvt.xuf.s1 f9 = f9
 | |
| 	;;
 | |
| 	/* Compute the reciprocal approximation.  */
 | |
| 	frcpa.s1 f10, p6 = f8, f9
 | |
| 	;;
 | |
| 	/* 3 Newton-Raphson iterations.  */
 | |
| (p6)	fnma.s1 f11 = f9, f10, f1
 | |
| (p6)	fmpy.s1 f12 = f8, f10
 | |
| 	;;
 | |
| (p6)	fmpy.s1 f13 = f11, f11
 | |
| (p6)	fma.s1 f12 = f11, f12, f12
 | |
| 	;;
 | |
| (p6)	fma.s1 f10 = f11, f10, f10
 | |
| (p6)	fma.s1 f11 = f13, f12, f12
 | |
| 	;;
 | |
| (p6)	fma.s1 f10 = f13, f10, f10
 | |
| (p6)	fnma.s1 f12 = f9, f11, f8
 | |
| 	;;
 | |
| (p6)	fma.s1 f10 = f12, f10, f11
 | |
| 	;;
 | |
| 	/* Round quotient to an unsigned integer.  */
 | |
| 	fcvt.fxu.trunc.s1 f10 = f10
 | |
| 	;;
 | |
| 	/* Transfer result to GP registers.  */
 | |
| 	getf.sig ret0 = f10
 | |
| 	br.ret.sptk rp
 | |
| 	;;
 | |
| END(___udivdi3)
 | |
| 	.symver	___udivdi3, __udivdi3@GLIBC_2.2
 | |
| 
 | |
| /* __umoddi3
 | |
|    Compute a 64-bit unsigned integer modulus.
 | |
|    in0 holds the dividend (a).  in1 holds the divisor (b).  */
 | |
| 
 | |
| ENTRY(___umoddi3)
 | |
| 	.regstk 2,0,0,0
 | |
| 	/* Transfer inputs to FP registers.  */
 | |
| 	setf.sig f14 = in0
 | |
| 	setf.sig f9 = in1
 | |
| 	;;
 | |
| 	/* Convert the inputs to FP, to avoid FP software assist faults.  */
 | |
| 	fcvt.xuf.s1 f8 = f14
 | |
| 	fcvt.xuf.s1 f9 = f9
 | |
| 	;;
 | |
| 	/* Compute the reciprocal approximation.  */
 | |
| 	frcpa.s1 f10, p6 = f8, f9
 | |
| 	;;
 | |
| 	/* 3 Newton-Raphson iterations.  */
 | |
| (p6)	fmpy.s1 f12 = f8, f10
 | |
| (p6)	fnma.s1 f11 = f9, f10, f1
 | |
| 	;;
 | |
| (p6)	fma.s1 f12 = f11, f12, f12
 | |
| (p6)	fmpy.s1 f13 = f11, f11
 | |
| 	;;
 | |
| (p6)	fma.s1 f10 = f11, f10, f10
 | |
| (p6)	fma.s1 f11 = f13, f12, f12
 | |
| 	;;
 | |
| 	sub in1 = r0, in1
 | |
| (p6)	fma.s1 f10 = f13, f10, f10
 | |
| (p6)	fnma.s1 f12 = f9, f11, f8
 | |
| 	;;
 | |
| 	setf.sig f9 = in1
 | |
| (p6)	fma.s1 f10 = f12, f10, f11
 | |
| 	;;
 | |
| 	/* Round quotient to an unsigned integer.  */
 | |
| 	fcvt.fxu.trunc.s1 f10 = f10
 | |
| 	;;
 | |
| 	/* r = q * (-b) + a  */
 | |
| 	xma.l f10 = f10, f9, f14
 | |
| 	;;
 | |
| 	/* Transfer result to GP registers.  */
 | |
| 	getf.sig ret0 = f10
 | |
| 	br.ret.sptk rp
 | |
| 	;;
 | |
| END(___umoddi3)
 | |
| 	.symver	___umoddi3, __umoddi3@GLIBC_2.2
 | |
| 
 | |
| /* __multi3
 | |
|    Compute a 128-bit multiply of 128-bit multiplicands.
 | |
|    in0/in1 holds one multiplicand (a), in2/in3 holds the other one (b).  */
 | |
| 
 | |
| ENTRY(___multi3)
 | |
| 	.regstk 4,0,0,0
 | |
| 	setf.sig f6 = in1
 | |
| 	movl r19 = 0xffffffff
 | |
| 	setf.sig f7 = in2
 | |
| 	;;
 | |
| 	and r14 = r19, in0
 | |
| 	;;
 | |
| 	setf.sig f10 = r14
 | |
| 	and r14 = r19, in2
 | |
| 	xmpy.l f9 = f6, f7
 | |
| 	;;
 | |
| 	setf.sig f6 = r14
 | |
| 	shr.u r14 = in0, 32
 | |
| 	;;
 | |
| 	setf.sig f7 = r14
 | |
| 	shr.u r14 = in2, 32
 | |
| 	;;
 | |
| 	setf.sig f8 = r14
 | |
| 	xmpy.l f11 = f10, f6
 | |
| 	xmpy.l f6 = f7, f6
 | |
| 	;;
 | |
| 	getf.sig r16 = f11
 | |
| 	xmpy.l f7 = f7, f8
 | |
| 	;;
 | |
| 	shr.u r14 = r16, 32
 | |
| 	and r16 = r19, r16
 | |
| 	getf.sig r17 = f6
 | |
| 	setf.sig f6 = in0
 | |
| 	;;
 | |
| 	setf.sig f11 = r14
 | |
| 	getf.sig r21 = f7
 | |
| 	setf.sig f7 = in3
 | |
| 	;;
 | |
| 	xma.l f11 = f10, f8, f11
 | |
| 	xma.l f6 = f6, f7, f9
 | |
| 	;;
 | |
| 	getf.sig r18 = f11
 | |
| 	;;
 | |
| 	add r18 = r18, r17
 | |
| 	;;
 | |
| 	and r15 = r19, r18
 | |
| 	cmp.ltu p7, p6 = r18, r17
 | |
| 	;;
 | |
| 	getf.sig r22 = f6
 | |
| (p7)	adds r14 = 1, r19
 | |
| 	;;
 | |
| (p7)	add r21 = r21, r14
 | |
| 	shr.u r14 = r18, 32
 | |
| 	shl r15 = r15, 32
 | |
| 	;;
 | |
| 	add r20 = r21, r14
 | |
| 	;;
 | |
| 	add ret0 = r15, r16
 | |
| 	add ret1 = r22, r20
 | |
| 	br.ret.sptk rp
 | |
| 	;;
 | |
| END(___multi3)
 | |
| 	.symver	___multi3, __multi3@GLIBC_2.2
 | |
| 
 | |
| #endif
 |