1
0
mirror of https://sourceware.org/git/glibc.git synced 2025-07-29 11:41:21 +03:00

string: Improve generic strncmp

It follows the strategy:

   - Align the first input to word boundary using byte operations.

   - If second input is also word aligned, read a word per time, check
     for  null (using has_zero), and check final words using byte
     operation.

   - If second input is not word aligned, loop by aligning the source,
     and merge the result of two reads.  Similar to aligned case, check
     for null with has_zero, and check final words using byte operation.

Checked on x86_64-linux-gnu, i686-linux-gnu, powerpc64-linux-gnu,
and powerpc-linux-gnu by removing the arch-specific assembly
implementation and disabling multi-arch (it covers both LE and BE
for 64 and 32 bits).

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Adhemerval Zanella
2023-01-20 16:18:33 -03:00
parent 30cf54bf30
commit 367c31b5d6

View File

@ -15,7 +15,12 @@
License along with the GNU C Library; if not, see License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */ <https://www.gnu.org/licenses/>. */
#include <stdint.h>
#include <string-fzb.h>
#include <string-fzc.h>
#include <string-fzi.h>
#include <string.h> #include <string.h>
#include <sys/param.h>
#include <memcopy.h> #include <memcopy.h>
#undef strncmp #undef strncmp
@ -24,51 +29,110 @@
#define STRNCMP strncmp #define STRNCMP strncmp
#endif #endif
static inline int
final_cmp (const op_t w1, const op_t w2, size_t n)
{
unsigned int idx = index_first_zero_ne (w1, w2);
if (n <= idx)
return 0;
return extractbyte (w1, idx) - extractbyte (w2, idx);
}
/* Aligned loop: if a difference is found, exit to compare the bytes. Else
if a zero is found we have equal strings. */
static inline int
strncmp_aligned_loop (const op_t *x1, const op_t *x2, op_t w1, size_t n)
{
op_t w2 = *x2++;
while (w1 == w2)
{
if (n <= sizeof (op_t))
break;
n -= sizeof (op_t);
if (has_zero (w1))
return 0;
w1 = *x1++;
w2 = *x2++;
}
return final_cmp (w1, w2, n);
}
/* Unaligned loop: align the first partial of P2, with 0xff for the rest of
the bytes so that we can also apply the has_zero test to see if we have
already reached EOS. If we have, then we can simply fall through to the
final comparison. */
static inline int
strncmp_unaligned_loop (const op_t *x1, const op_t *x2, op_t w1, uintptr_t ofs,
size_t n)
{
op_t w2a = *x2++;
uintptr_t sh_1 = ofs * CHAR_BIT;
uintptr_t sh_2 = sizeof(op_t) * CHAR_BIT - sh_1;
op_t w2 = MERGE (w2a, sh_1, (op_t)-1, sh_2);
if (!has_zero (w2) && n > (sizeof (op_t) - ofs))
{
op_t w2b;
/* Unaligned loop. The invariant is that W2B, which is "ahead" of W1,
does not contain end-of-string. Therefore it is safe (and necessary)
to read another word from each while we do not have a difference. */
while (1)
{
w2b = *x2++;
w2 = MERGE (w2a, sh_1, w2b, sh_2);
if (n <= sizeof (op_t) || w1 != w2)
return final_cmp (w1, w2, n);
n -= sizeof(op_t);
if (has_zero (w2b) || n <= (sizeof (op_t) - ofs))
break;
w1 = *x1++;
w2a = w2b;
}
/* Zero found in the second partial of P2. If we had EOS in the aligned
word, we have equality. */
if (has_zero (w1))
return 0;
/* Load the final word of P1 and align the final partial of P2. */
w1 = *x1++;
w2 = MERGE (w2b, sh_1, 0, sh_2);
}
return final_cmp (w1, w2, n);
}
/* Compare no more than N characters of S1 and S2, /* Compare no more than N characters of S1 and S2,
returning less than, equal to or greater than zero returning less than, equal to or greater than zero
if S1 is lexicographically less than, equal to or if S1 is lexicographically less than, equal to or
greater than S2. */ greater than S2. */
int int
STRNCMP (const char *s1, const char *s2, size_t n) STRNCMP (const char *p1, const char *p2, size_t n)
{ {
unsigned char c1 = '\0'; /* Handle the unaligned bytes of p1 first. */
unsigned char c2 = '\0'; uintptr_t a = MIN (-(uintptr_t)p1 % sizeof(op_t), n);
int diff = 0;
if (n >= 4) for (int i = 0; i < a; ++i)
{ {
size_t n4 = n >> 2; unsigned char c1 = *p1++;
do unsigned char c2 = *p2++;
{ diff = c1 - c2;
c1 = (unsigned char) *s1++; if (c1 == '\0' || diff != 0)
c2 = (unsigned char) *s2++; return diff;
if (c1 == '\0' || c1 != c2)
return c1 - c2;
c1 = (unsigned char) *s1++;
c2 = (unsigned char) *s2++;
if (c1 == '\0' || c1 != c2)
return c1 - c2;
c1 = (unsigned char) *s1++;
c2 = (unsigned char) *s2++;
if (c1 == '\0' || c1 != c2)
return c1 - c2;
c1 = (unsigned char) *s1++;
c2 = (unsigned char) *s2++;
if (c1 == '\0' || c1 != c2)
return c1 - c2;
} while (--n4 > 0);
n &= 3;
} }
if (a == n)
return 0;
while (n > 0) /* P1 is now aligned to op_t. P2 may or may not be. */
{ const op_t *x1 = (const op_t *) p1;
c1 = (unsigned char) *s1++; op_t w1 = *x1++;
c2 = (unsigned char) *s2++; uintptr_t ofs = (uintptr_t) p2 % sizeof(op_t);
if (c1 == '\0' || c1 != c2) return ofs == 0
return c1 - c2; ? strncmp_aligned_loop (x1, (const op_t *) p2, w1, n - a)
n--; : strncmp_unaligned_loop (x1, (const op_t *) (p2 - ofs), w1, ofs, n - a);
}
return c1 - c2;
} }
libc_hidden_builtin_def (STRNCMP) libc_hidden_builtin_def (STRNCMP)