mirror of
				https://sourceware.org/git/glibc.git
				synced 2025-11-03 20:53:13 +03:00 
			
		
		
		
	Also fixed the following whitespace nits to satisfy the push: sysdeps/alpha/alphaev6/memset.S:142: space before tab in indent. sysdeps/alpha/configure:1: new blank line at EOF. sysdeps/alpha/fpu/e_sqrt.c:126: space before tab in indent. sysdeps/alpha/preconfigure:1: new blank line at EOF. sysdeps/unix/sysv/linux/alpha/syscalls.list:1: new blank line at EOF.
		
			
				
	
	
		
			175 lines
		
	
	
		
			4.4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			175 lines
		
	
	
		
			4.4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/* Copyright (C) 2010-2014 Free Software Foundation, Inc.
 | 
						|
   This file is part of the GNU C Library.
 | 
						|
 | 
						|
   The GNU C Library is free software; you can redistribute it and/or
 | 
						|
   modify it under the terms of the GNU Lesser General Public
 | 
						|
   License as published by the Free Software Foundation; either
 | 
						|
   version 2.1 of the License, or (at your option) any later version.
 | 
						|
 | 
						|
   The GNU C Library is distributed in the hope that it will be useful,
 | 
						|
   but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | 
						|
   Lesser General Public License for more details.
 | 
						|
 | 
						|
   You should have received a copy of the GNU Lesser General Public
 | 
						|
   License along with the GNU C Library.  If not, see
 | 
						|
   <http://www.gnu.org/licenses/>.  */
 | 
						|
 | 
						|
#include <string.h>
 | 
						|
 | 
						|
typedef unsigned long word;
 | 
						|
 | 
						|
static inline word
 | 
						|
ldq_u(const void *s)
 | 
						|
{
 | 
						|
  return *(const word *)((word)s & -8);
 | 
						|
}
 | 
						|
 | 
						|
#define unlikely(X)	__builtin_expect ((X), 0)
 | 
						|
#define prefetch(X)	__builtin_prefetch ((void *)(X), 0)
 | 
						|
 | 
						|
#define cmpbeq0(X)	__builtin_alpha_cmpbge(0, (X))
 | 
						|
#define find(X, Y)	cmpbeq0 ((X) ^ (Y))
 | 
						|
 | 
						|
/* Search no more than N bytes of S for C.  */
 | 
						|
 | 
						|
void *
 | 
						|
__memchr (const void *s, int xc, size_t n)
 | 
						|
{
 | 
						|
  const word *s_align;
 | 
						|
  word t, current, found, mask, offset;
 | 
						|
 | 
						|
  if (unlikely (n == 0))
 | 
						|
    return 0;
 | 
						|
 | 
						|
  current = ldq_u (s);
 | 
						|
 | 
						|
  /* Replicate low byte of XC into all bytes of C.  */
 | 
						|
  t = xc & 0xff;			/* 0000000c */
 | 
						|
  t = (t << 8) | t;			/* 000000cc */
 | 
						|
  t = (t << 16) | t;			/* 0000cccc */
 | 
						|
  const word c = (t << 32) | t;		/* cccccccc */
 | 
						|
 | 
						|
  /* Align the source, and decrement the count by the number
 | 
						|
     of bytes searched in the first word.  */
 | 
						|
  s_align = (const word *)((word)s & -8);
 | 
						|
  n += ((word)s & 7);
 | 
						|
 | 
						|
  /* Deal with misalignment in the first word for the comparison.  */
 | 
						|
  mask = (1ul << ((word)s & 7)) - 1;
 | 
						|
 | 
						|
  /* If the entire string fits within one word, we may need masking
 | 
						|
     at both the front and the back of the string.  */
 | 
						|
  if (unlikely (n <= 8))
 | 
						|
    {
 | 
						|
      mask |= -1ul << n;
 | 
						|
      goto last_quad;
 | 
						|
    }
 | 
						|
 | 
						|
  found = find (current, c) & ~mask;
 | 
						|
  if (unlikely (found))
 | 
						|
    goto found_it;
 | 
						|
 | 
						|
  s_align++;
 | 
						|
  n -= 8;
 | 
						|
 | 
						|
  /* If the block is sufficiently large, align to cacheline and prefetch.  */
 | 
						|
  if (unlikely (n >= 256))
 | 
						|
    {
 | 
						|
      /* Prefetch 3 cache lines beyond the one we're working on.  */
 | 
						|
      prefetch (s_align + 8);
 | 
						|
      prefetch (s_align + 16);
 | 
						|
      prefetch (s_align + 24);
 | 
						|
 | 
						|
      while ((word)s_align & 63)
 | 
						|
	{
 | 
						|
	  current = *s_align;
 | 
						|
	  found = find (current, c);
 | 
						|
	  if (found)
 | 
						|
	    goto found_it;
 | 
						|
	  s_align++;
 | 
						|
	  n -= 8;
 | 
						|
	}
 | 
						|
 | 
						|
	/* Within each cacheline, advance the load for the next word
 | 
						|
	   before the test for the previous word is complete.  This
 | 
						|
	   allows us to hide the 3 cycle L1 cache load latency.  We
 | 
						|
	   only perform this advance load within a cacheline to prevent
 | 
						|
	   reading across page boundary.  */
 | 
						|
#define CACHELINE_LOOP				\
 | 
						|
	do {					\
 | 
						|
	  word i, next = s_align[0];		\
 | 
						|
	  for (i = 0; i < 7; ++i)		\
 | 
						|
	    {					\
 | 
						|
	      current = next;			\
 | 
						|
	      next = s_align[1];		\
 | 
						|
	      found = find (current, c);	\
 | 
						|
	      if (unlikely (found))		\
 | 
						|
		goto found_it;			\
 | 
						|
	      s_align++;			\
 | 
						|
	    }					\
 | 
						|
	  current = next;			\
 | 
						|
	  found = find (current, c);		\
 | 
						|
	  if (unlikely (found))			\
 | 
						|
	    goto found_it;			\
 | 
						|
	  s_align++;				\
 | 
						|
	  n -= 64;				\
 | 
						|
	} while (0)
 | 
						|
 | 
						|
      /* While there's still lots more data to potentially be read,
 | 
						|
	 continue issuing prefetches for the 4th cacheline out.  */
 | 
						|
      while (n >= 256)
 | 
						|
	{
 | 
						|
	  prefetch (s_align + 24);
 | 
						|
	  CACHELINE_LOOP;
 | 
						|
	}
 | 
						|
 | 
						|
      /* Up to 3 cache lines remaining.  Continue issuing advanced
 | 
						|
	 loads, but stop prefetching.  */
 | 
						|
      while (n >= 64)
 | 
						|
	CACHELINE_LOOP;
 | 
						|
 | 
						|
      /* We may have exhausted the buffer.  */
 | 
						|
      if (n == 0)
 | 
						|
	return NULL;
 | 
						|
    }
 | 
						|
 | 
						|
  /* Quadword aligned loop.  */
 | 
						|
  current = *s_align;
 | 
						|
  while (n > 8)
 | 
						|
    {
 | 
						|
      found = find (current, c);
 | 
						|
      if (unlikely (found))
 | 
						|
	goto found_it;
 | 
						|
      current = *++s_align;
 | 
						|
      n -= 8;
 | 
						|
    }
 | 
						|
 | 
						|
  /* The last word may need masking at the tail of the compare.  */
 | 
						|
  mask = -1ul << n;
 | 
						|
 last_quad:
 | 
						|
  found = find (current, c) & ~mask;
 | 
						|
  if (found == 0)
 | 
						|
    return NULL;
 | 
						|
 | 
						|
 found_it:
 | 
						|
#ifdef __alpha_cix__
 | 
						|
  offset = __builtin_alpha_cttz (found);
 | 
						|
#else
 | 
						|
  /* Extract LSB.  */
 | 
						|
  found &= -found;
 | 
						|
 | 
						|
  /* Binary search for the LSB.  */
 | 
						|
  offset  = (found & 0x0f ? 0 : 4);
 | 
						|
  offset += (found & 0x33 ? 0 : 2);
 | 
						|
  offset += (found & 0x55 ? 0 : 1);
 | 
						|
#endif
 | 
						|
 | 
						|
  return (void *)((word)s_align + offset);
 | 
						|
}
 | 
						|
 | 
						|
#ifdef weak_alias
 | 
						|
weak_alias (__memchr, memchr)
 | 
						|
#endif
 | 
						|
libc_hidden_builtin_def (memchr)
 |