1
0
mirror of https://sourceware.org/git/glibc.git synced 2025-07-30 22:43:12 +03:00

Optimize mempcpy on sparc.

* sysdeps/sparc/sparc32/memcpy.S: Implement mempcpy using a stub
	that branches into memcpy.
	* sysdeps/sparc/sparc64/memcpy.S: Likewise.
	* sysdeps/sparc/sparc64/multiarch/memcpy-niagara1.S: Likewise.
	* sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S: Likewise.
	* sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S: Likewise.
	* sysdeps/sparc/sparc64/multiarch/memcpy.S: Add mempcpy multiarch
	bits.
	* sysdeps/sparc/sparc64/rtld-memcpy.c: Include generic mempcpy
	implementation too.
	* sysdeps/sparc/mempcpy.S: New file.
This commit is contained in:
David S. Miller
2012-03-28 22:35:26 -07:00
parent e5aa83e16d
commit 88d85d4f00
9 changed files with 124 additions and 13 deletions

View File

@ -1,5 +1,17 @@
2012-03-28 David S. Miller <davem@davemloft.net> 2012-03-28 David S. Miller <davem@davemloft.net>
* sysdeps/sparc/sparc32/memcpy.S: Implement mempcpy using a stub
that branches into memcpy.
* sysdeps/sparc/sparc64/memcpy.S: Likewise.
* sysdeps/sparc/sparc64/multiarch/memcpy-niagara1.S: Likewise.
* sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S: Likewise.
* sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S: Likewise.
* sysdeps/sparc/sparc64/multiarch/memcpy.S: Add mempcpy multiarch
bits.
* sysdeps/sparc/sparc64/rtld-memcpy.c: Include generic mempcpy
implementation too.
* sysdeps/sparc/mempcpy.S: New file.
* sysdeps/sparc/sparc64/multiarch/memcpy.S: Provide a hidden def to * sysdeps/sparc/sparc64/multiarch/memcpy.S: Provide a hidden def to
the IFUNC routine in the libc case. the IFUNC routine in the libc case.
* sysdeps/sparc/sparc64/multiarch/memcpy.S: Likewise. * sysdeps/sparc/sparc64/multiarch/memcpy.S: Likewise.

1
sysdeps/sparc/mempcpy.S Normal file
View File

@ -0,0 +1 @@
/* mempcpy is in memcpy.S */

View File

@ -104,11 +104,17 @@
std %t2, [%dst + offset + offset2 + 0x08]; std %t2, [%dst + offset + offset2 + 0x08];
.text .text
.align 4 ENTRY(__mempcpy)
add %o0, %o2, %g1
ba 101f
st %g1, [%sp + 64]
END(__mempcpy)
.align 4
ENTRY(memcpy) /* %o0=dst %o1=src %o2=len */ ENTRY(memcpy) /* %o0=dst %o1=src %o2=len */
sub %o0, %o1, %o4
st %o0, [%sp + 64] st %o0, [%sp + 64]
101:
sub %o0, %o1, %o4
9: andcc %o4, 3, %o5 9: andcc %o4, 3, %o5
0: bne 86f 0: bne 86f
cmp %o2, 15 cmp %o2, 15
@ -641,3 +647,7 @@ ENTRY(memcpy) /* %o0=dst %o1=src %o2=len */
END(memcpy) END(memcpy)
libc_hidden_builtin_def (memcpy) libc_hidden_builtin_def (memcpy)
libc_hidden_def (__mempcpy)
weak_alias (__mempcpy, mempcpy)
libc_hidden_builtin_def (mempcpy)

View File

@ -374,19 +374,24 @@ ENTRY(__memcpy_large)
mov %g4, %o0 mov %g4, %o0
END(__memcpy_large) END(__memcpy_large)
ENTRY(__mempcpy)
ba,pt %xcc, 210f
add %o0, %o2, %g4
END(__mempcpy)
.align 32 .align 32
ENTRY(memcpy) ENTRY(memcpy)
mov %o0, %g4 /* IEU0 Group */
210: 210:
#ifndef USE_BPR #ifndef USE_BPR
srl %o2, 0, %o2 /* IEU1 Group */ srl %o2, 0, %o2 /* IEU1 */
#endif #endif
brz,pn %o2, 209b /* CTI Group */ brz,pn %o2, 209b /* CTI Group */
mov %o0, %g4 /* IEU0 */ 218: cmp %o2, 15 /* IEU1 */
218: cmp %o2, 15 /* IEU1 Group */ bleu,pn %xcc, 208b /* CTI Group */
bleu,pn %xcc, 208b /* CTI */ cmp %o2, (64 * 6) /* IEU1 */
cmp %o2, (64 * 6) /* IEU1 Group */ bgeu,pn %xcc, 200b /* CTI Group */
bgeu,pn %xcc, 200b /* CTI */ andcc %o0, 7, %g2 /* IEU1 */
andcc %o0, 7, %g2 /* IEU1 Group */
sub %o0, %o1, %g5 /* IEU0 */ sub %o0, %o1, %g5 /* IEU0 */
andcc %g5, 3, %o5 /* IEU1 Group */ andcc %g5, 3, %o5 /* IEU1 Group */
bne,pn %xcc, 212f /* CTI */ bne,pn %xcc, 212f /* CTI */
@ -569,3 +574,7 @@ ENTRY(memcpy)
END(memcpy) END(memcpy)
libc_hidden_builtin_def (memcpy) libc_hidden_builtin_def (memcpy)
libc_hidden_def (__mempcpy)
weak_alias (__mempcpy, mempcpy)
libc_hidden_builtin_def (mempcpy)

View File

@ -43,13 +43,19 @@
.text .text
ENTRY(__mempcpy_niagara1)
ba,pt %XCC, 101f
add %o0, %o2, %g5
END(__mempcpy_niagara1)
.align 32 .align 32
ENTRY(__memcpy_niagara1) ENTRY(__memcpy_niagara1)
100: /* %o0=dst, %o1=src, %o2=len */
mov %o0, %g5
101:
# ifndef USE_BPR # ifndef USE_BPR
srl %o2, 0, %o2 srl %o2, 0, %o2
# endif # endif
100: /* %o0=dst, %o1=src, %o2=len */
mov %o0, %g5
cmp %o2, 0 cmp %o2, 0
be,pn %XCC, 85f be,pn %XCC, 85f
218: or %o0, %o1, %o3 218: or %o0, %o1, %o3

View File

@ -145,13 +145,19 @@
.text .text
ENTRY(__mempcpy_niagara2)
ba,pt %XCC, 101f
add %o0, %o2, %g5
END(__mempcpy_niagara2)
.align 32 .align 32
ENTRY(__memcpy_niagara2) ENTRY(__memcpy_niagara2)
100: /* %o0=dst, %o1=src, %o2=len */
mov %o0, %g5
101:
# ifndef USE_BPR # ifndef USE_BPR
srl %o2, 0, %o2 srl %o2, 0, %o2
# endif # endif
100: /* %o0=dst, %o1=src, %o2=len */
mov %o0, %g5
cmp %o2, 0 cmp %o2, 0
be,pn %XCC, 85f be,pn %XCC, 85f
218: or %o0, %o1, %o3 218: or %o0, %o1, %o3

View File

@ -38,6 +38,11 @@
.text .text
ENTRY(__mempcpy_ultra3)
ba,pt %XCC, 101f
add %o0, %o2, %g5
END(__mempcpy_ultra3)
/* Special/non-trivial issues of this code: /* Special/non-trivial issues of this code:
* *
* 1) %o5 is preserved from VISEntryHalf to VISExitHalf * 1) %o5 is preserved from VISEntryHalf to VISExitHalf
@ -57,6 +62,7 @@ ENTRY(__memcpy_ultra3)
100: /* %o0=dst, %o1=src, %o2=len */ 100: /* %o0=dst, %o1=src, %o2=len */
mov %o0, %g5 mov %o0, %g5
101:
cmp %o2, 0 cmp %o2, 0
be,pn %XCC, out be,pn %XCC, out
218: or %o0, %o1, %o3 218: or %o0, %o1, %o3

View File

@ -72,12 +72,72 @@ ENTRY(memcpy)
mov %o1, %o0 mov %o1, %o0
END(memcpy) END(memcpy)
ENTRY(__mempcpy)
.type __mempcpy, @gnu_indirect_function
# ifdef SHARED
SETUP_PIC_REG_LEAF(o3, o5)
# endif
andcc %o0, HWCAP_SPARC_N2, %g0
be 1f
andcc %o0, HWCAP_SPARC_BLKINIT, %g0
# ifdef SHARED
sethi %gdop_hix22(__mempcpy_niagara2), %o1
xor %o1, %gdop_lox10(__mempcpy_niagara2), %o1
# else
set __mempcpy_niagara2, %o1
# endif
ba 10f
nop
1: be 1f
andcc %o0, HWCAP_SPARC_ULTRA3, %g0
# ifdef SHARED
sethi %gdop_hix22(__mempcpy_niagara1), %o1
xor %o1, %gdop_lox10(__mempcpy_niagara1), %o1
# else
set __mempcpy_niagara1, %o1
# endif
ba 10f
nop
1: be 9f
nop
# ifdef SHARED
sethi %gdop_hix22(__mempcpy_ultra3), %o1
xor %o1, %gdop_lox10(__mempcpy_ultra3), %o1
# else
set __mempcpy_ultra3, %o1
# endif
ba 10f
nop
9:
# ifdef SHARED
sethi %gdop_hix22(__mempcpy_ultra1), %o1
xor %o1, %gdop_lox10(__mempcpy_ultra1), %o1
# else
set __mempcpy_ultra1, %o1
# endif
10:
# ifdef SHARED
add %o3, %o1, %o1
# endif
retl
mov %o1, %o0
END(__mempcpy)
libc_hidden_builtin_def (memcpy) libc_hidden_builtin_def (memcpy)
libc_hidden_def (__mempcpy)
weak_alias (__mempcpy, mempcpy)
libc_hidden_builtin_def (mempcpy)
#undef libc_hidden_builtin_def #undef libc_hidden_builtin_def
#define libc_hidden_builtin_def(name) #define libc_hidden_builtin_def(name)
#undef weak_alias
#define weak_alias(x, y)
#undef libc_hidden_def
#define libc_hidden_def(name)
#define memcpy __memcpy_ultra1 #define memcpy __memcpy_ultra1
#define __mempcpy __mempcpy_ultra1
#endif #endif

View File

@ -1 +1,2 @@
#include <string/memcpy.c> #include <string/memcpy.c>
#include <string/mempcpy.c>