mirror of
https://sourceware.org/git/glibc.git
synced 2025-08-07 06:43:00 +03:00
166 lines
3.5 KiB
ArmAsm
166 lines
3.5 KiB
ArmAsm
/* Optimized memcmp implementation for POWER10.
|
|
Copyright (C) 2021-2025 Free Software Foundation, Inc.
|
|
This file is part of the GNU C Library.
|
|
|
|
The GNU C Library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
The GNU C Library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with the GNU C Library; if not, see
|
|
<https://www.gnu.org/licenses/>. */
|
|
|
|
#include <sysdep.h>
|
|
|
|
/* Compare 32 bytes. */
|
|
#define COMPARE_32(vr1,vr2,offset,tail_1,tail_2)\
|
|
lxvp 32+vr1,offset(r3); \
|
|
lxvp 32+vr2,offset(r4); \
|
|
vcmpneb. v5,vr1+1,vr2+1; \
|
|
bne cr6,L(tail_2); \
|
|
vcmpneb. v4,vr1,vr2; \
|
|
bne cr6,L(tail_1); \
|
|
|
|
#define TAIL(v_res,s1,s2) \
|
|
vctzlsbb r7,v_res; \
|
|
vextubrx r8,r7,s1; \
|
|
vextubrx r9,r7,s2; \
|
|
subf r3,r9,r8; \
|
|
blr; \
|
|
|
|
/* int [r3] memcmp (const char *s1 [r3], const char *s2 [r4],
|
|
size_t size [r5]) */
|
|
|
|
#ifndef MEMCMP
|
|
# define MEMCMP memcmp
|
|
#endif
|
|
.machine power10
|
|
ENTRY_TOCLESS (MEMCMP, 4)
|
|
CALL_MCOUNT 3
|
|
|
|
cmpldi cr6,r5,64
|
|
bgt cr6,L(loop_head)
|
|
|
|
/* Compare 64 bytes. This section is used for lengths <= 64 and for the last
|
|
bytes for larger lengths. */
|
|
L(last_compare):
|
|
li r8,16
|
|
|
|
sldi r9,r5,56
|
|
sldi r8,r8,56
|
|
addi r6,r3,16
|
|
addi r7,r4,16
|
|
|
|
/* Align up to 16 bytes. */
|
|
lxvl 32+v0,r3,r9
|
|
lxvl 32+v2,r4,r9
|
|
|
|
/* The sub. and vcmpneb. results are concatenated by the crnand in order
|
|
to do a single branch. It's doing a NOT(CR0.GT AND CR6.EQ) then
|
|
loading to CR0.LT. That means r9 is not bigger than 0 and v4 is not
|
|
all equal to 0. */
|
|
sub. r9,r9,r8
|
|
vcmpneb. v4,v0,v2
|
|
crnand 4*cr0+lt,4*cr0+gt,4*cr6+eq
|
|
bt 4*cr0+lt,L(tail1)
|
|
|
|
addi r3,r3,32
|
|
addi r4,r4,32
|
|
|
|
lxvl 32+v1,r6,r9
|
|
lxvl 32+v3,r7,r9
|
|
sub. r9,r9,r8
|
|
vcmpneb. v5,v1,v3
|
|
crnand 4*cr0+lt,4*cr0+gt,4*cr6+eq
|
|
bt 4*cr0+lt,L(tail2)
|
|
|
|
addi r6,r3,16
|
|
addi r7,r4,16
|
|
|
|
lxvl 32+v6,r3,r9
|
|
lxvl 32+v8,r4,r9
|
|
sub. r9,r9,r8
|
|
vcmpneb. v4,v6,v8
|
|
crnand 4*cr0+lt,4*cr0+gt,4*cr6+eq
|
|
bt 4*cr0+lt,L(tail3)
|
|
|
|
lxvl 32+v7,r6,r9
|
|
lxvl 32+v9,r7,r9
|
|
vcmpneb. v5,v7,v9
|
|
bne cr6,L(tail4)
|
|
|
|
L(finish):
|
|
/* The contents are equal. */
|
|
li r3,0
|
|
blr
|
|
|
|
L(loop_head):
|
|
/* Calculate how many loops to run. */
|
|
srdi. r8,r5,7
|
|
beq L(loop_tail)
|
|
mtctr r8
|
|
|
|
/* Main loop. Compares 128 bytes each loop. */
|
|
.p2align 5
|
|
L(loop_128):
|
|
COMPARE_32(v0,v2,0,tail1,tail2)
|
|
COMPARE_32(v6,v8,32,tail3,tail4)
|
|
COMPARE_32(v10,v12,64,tail5,tail6)
|
|
COMPARE_32(v14,v16,96,tail7,tail8)
|
|
|
|
addi r3,r3,128
|
|
addi r4,r4,128
|
|
bdnz L(loop_128)
|
|
|
|
/* Account loop comparisons. */
|
|
clrldi. r5,r5,57
|
|
beq L(finish)
|
|
|
|
/* Compares 64 bytes if length is still bigger than 64 bytes. */
|
|
.p2align 5
|
|
L(loop_tail):
|
|
cmpldi r5,64
|
|
ble L(last_compare)
|
|
COMPARE_32(v0,v2,0,tail1,tail2)
|
|
COMPARE_32(v6,v8,32,tail3,tail4)
|
|
addi r3,r3,64
|
|
addi r4,r4,64
|
|
subi r5,r5,64
|
|
b L(last_compare)
|
|
|
|
L(tail1):
|
|
TAIL(v4,v0,v2)
|
|
|
|
L(tail2):
|
|
TAIL(v5,v1,v3)
|
|
|
|
L(tail3):
|
|
TAIL(v4,v6,v8)
|
|
|
|
L(tail4):
|
|
TAIL(v5,v7,v9)
|
|
|
|
L(tail5):
|
|
TAIL(v4,v10,v12)
|
|
|
|
L(tail6):
|
|
TAIL(v5,v11,v13)
|
|
|
|
L(tail7):
|
|
TAIL(v4,v14,v16)
|
|
|
|
L(tail8):
|
|
TAIL(v5,v15,v17)
|
|
|
|
END (MEMCMP)
|
|
libc_hidden_builtin_def (memcmp)
|
|
weak_alias (memcmp, bcmp)
|
|
strong_alias (memcmp, __memcmpeq)
|
|
libc_hidden_def (__memcmpeq)
|