mirror of
https://sourceware.org/git/glibc.git
synced 2025-08-10 05:03:06 +03:00
resolv: Optimize inet_ntop
The benchtests/inet_ntop_ipv4 and benchtests/inet_ntop_ipv6 profile shows that most of time is spent in costly sprint operations: $ perf record ./benchtests/bench-inet_ntop_ipv4 && perf report --stdio [...] 38.53% bench-inet_ntop libc.so [.] __printf_buffer 18.69% bench-inet_ntop libc.so [.] __printf_buffer_write 11.01% bench-inet_ntop libc.so [.] _itoa_word 8.02% bench-inet_ntop bench-inet_ntop_ipv4 [.] bench_start 6.99% bench-inet_ntop libc.so [.] __memmove_avx_unaligned_erms 3.86% bench-inet_ntop libc.so [.] __strchrnul_avx2 2.82% bench-inet_ntop libc.so [.] __strcpy_avx2 1.90% bench-inet_ntop libc.so [.] inet_ntop4 1.78% bench-inet_ntop libc.so [.] __vsprintf_internal 1.55% bench-inet_ntop libc.so [.] __sprintf_chk 1.18% bench-inet_ntop libc.so [.] __GI___inet_ntop $ perf record ./benchtests/bench-inet_ntop_ipv6 && perf report --stdio 35.44% bench-inet_ntop libc.so [.] __printf_buffer 14.35% bench-inet_ntop libc.so [.] __printf_buffer_write 10.27% bench-inet_ntop libc.so [.] __GI___inet_ntop 7.93% bench-inet_ntop libc.so [.] _itoa_word 7.00% bench-inet_ntop libc.so [.] __sprintf_chk 6.20% bench-inet_ntop libc.so [.] __vsprintf_internal 5.26% bench-inet_ntop libc.so [.] __strchrnul_avx2 5.05% bench-inet_ntop bench-inet_ntop_ipv6 [.] bench_start 3.70% bench-inet_ntop libc.so [.] __memmove_avx_unaligned_erms 2.11% bench-inet_ntop libc.so [.] __printf_buffer_done A new implementation is used instead: * The printf usage is replaced with an expanded function that prints either an IPv4 octet or an IPv6 quartet; * The strcpy is replaced with a memcpy (since ABIs usually tends to optimize the latter); * For IPv6, the '::' shorthanding is done in-place instead of using a temporary buffer. * An temporary buffer is used iff the size if larger than INET_ADDRSTRLEN/INET6_ADDRSTRLEN. * Inline is used for both inet_ntop4 and inet_ntop6, The code is significand rewrote, so I take this requires a new license. The performance results on aarch64 Neoverse1 with gcc 14.2.1: * master aarch64-linux-gnu-master$ ./benchtests/bench-inet_ntop_ipv4 "inet_ntop_ipv4": { "workload-ipv4-random": { "duration": 1.43067e+09, "iterations": 8e+06, "reciprocal-throughput": 178.572, "latency": 179.096, "max-throughput": 5.59997e+06, "min-throughput": 5.58359e+06 } aarch64-linux-gnu-master$ ./benchtests/bench-inet_ntop_ipv6 "inet_ntop_ipv6": { "workload-ipv6-random": { "duration": 1.68539e+09, "iterations": 4e+06, "reciprocal-throughput": 421.307, "latency": 421.388, "max-throughput": 2.37357e+06, "min-throughput": 2.37311e+06 } } * patched aarch64-linux-gnu$ ./benchtests/bench-inet_ntop_ipv4 "inet_ntop_ipv4": { "workload-ipv4-random": { "duration": 1.06133e+09, "iterations": 5.6e+07, "reciprocal-throughput": 18.8482, "latency": 19.0565, "max-throughput": 5.30555e+07, "min-throughput": 5.24755e+07 } } aarch64-linux-gnu$ ./benchtests/bench-inet_ntop_ipv6 "inet_ntop_ipv6": { "workload-ipv6-random": { "duration": 1.01246e+09, "iterations": 2.4e+07, "reciprocal-throughput": 42.5576, "latency": 41.8139, "max-throughput": 2.34976e+07, "min-throughput": 2.39155e+07 } } Checked on aarch64-linux-gnu and x86_64-linux-gnu. Reviewed-by: DJ Delorie <dj@redhat.com>
This commit is contained in:
@@ -1,136 +1,155 @@
|
|||||||
/*
|
/* Convert IPv4/IPv6 addresses from binary to text form.
|
||||||
* Copyright (c) 1996-1999 by Internet Software Consortium.
|
Copyright (C) 1996-2025 Free Software Foundation, Inc.
|
||||||
*
|
This file is part of the GNU C Library.
|
||||||
* Permission to use, copy, modify, and distribute this software for any
|
|
||||||
* purpose with or without fee is hereby granted, provided that the above
|
|
||||||
* copyright notice and this permission notice appear in all copies.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM DISCLAIMS
|
|
||||||
* ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
|
|
||||||
* OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL INTERNET SOFTWARE
|
|
||||||
* CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
|
|
||||||
* DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
|
|
||||||
* PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
|
||||||
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
|
|
||||||
* SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <sys/param.h>
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
#include <sys/types.h>
|
modify it under the terms of the GNU Lesser General Public
|
||||||
#include <sys/socket.h>
|
License as published by the Free Software Foundation; either
|
||||||
|
version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with the GNU C Library; if not, see
|
||||||
|
<https://www.gnu.org/licenses/>. */
|
||||||
|
|
||||||
#include <netinet/in.h>
|
|
||||||
#include <arpa/inet.h>
|
#include <arpa/inet.h>
|
||||||
#include <arpa/nameser.h>
|
#include <arpa/nameser.h>
|
||||||
|
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <stdio.h>
|
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#include <_itoa.h>
|
||||||
|
|
||||||
#ifdef SPRINTF_CHAR
|
static inline char *
|
||||||
# define SPRINTF(x) strlen (sprintf /**/ x)
|
put_uint8 (uint8_t word, char *tp)
|
||||||
#else
|
|
||||||
# define SPRINTF(x) ((size_t) sprintf x)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
|
||||||
* WARNING: Don't even consider trying to compile this on a system where
|
|
||||||
* sizeof(int) < 4. sizeof(int) > 4 is fine; all the world's not a VAX.
|
|
||||||
*/
|
|
||||||
|
|
||||||
static const char *inet_ntop4 (const u_char *src, char *dst, socklen_t size);
|
|
||||||
static const char *inet_ntop6 (const u_char *src, char *dst, socklen_t size);
|
|
||||||
|
|
||||||
/* char *
|
|
||||||
* __inet_ntop(af, src, dst, size)
|
|
||||||
* convert a network format address to presentation format.
|
|
||||||
* return:
|
|
||||||
* pointer to presentation format address (`dst'), or NULL (see errno).
|
|
||||||
* author:
|
|
||||||
* Paul Vixie, 1996.
|
|
||||||
*/
|
|
||||||
const char *
|
|
||||||
__inet_ntop (int af, const void *src, char *dst, socklen_t size)
|
|
||||||
{
|
{
|
||||||
switch (af)
|
int s = 1;
|
||||||
|
if (word >= 10)
|
||||||
{
|
{
|
||||||
case AF_INET:
|
if (word >= 100)
|
||||||
return (inet_ntop4 (src, dst, size));
|
{
|
||||||
case AF_INET6:
|
tp[2] = '0' + word % 10;
|
||||||
return (inet_ntop6 (src, dst, size));
|
word /= 10;
|
||||||
default:
|
s += 1;
|
||||||
__set_errno (EAFNOSUPPORT);
|
|
||||||
return (NULL);
|
|
||||||
}
|
}
|
||||||
/* NOTREACHED */
|
|
||||||
|
tp[1] = '0' + word % 10;
|
||||||
|
word /= 10;
|
||||||
|
s += 1;
|
||||||
|
}
|
||||||
|
*tp = '0' + word;
|
||||||
|
return tp + s;
|
||||||
}
|
}
|
||||||
libc_hidden_def (__inet_ntop)
|
|
||||||
weak_alias (__inet_ntop, inet_ntop)
|
|
||||||
|
|
||||||
/* const char *
|
static inline char *
|
||||||
* inet_ntop4(src, dst, size)
|
put_uint16 (uint16_t word, char *tp)
|
||||||
* format an IPv4 address
|
|
||||||
* return:
|
|
||||||
* `dst' (as a const)
|
|
||||||
* notes:
|
|
||||||
* (1) uses no statics
|
|
||||||
* (2) takes a u_char* not an in_addr as input
|
|
||||||
* author:
|
|
||||||
* Paul Vixie, 1996.
|
|
||||||
*/
|
|
||||||
static const char *
|
|
||||||
inet_ntop4 (const u_char *src, char *dst, socklen_t size)
|
|
||||||
{
|
{
|
||||||
static const char fmt[] = "%u.%u.%u.%u";
|
if (word >= 0x1000)
|
||||||
char tmp[sizeof "255.255.255.255"];
|
*tp++ = _itoa_lower_digits[(word >> 12) & 0xf];
|
||||||
|
if (word >= 0x100)
|
||||||
|
*tp++ = _itoa_lower_digits[(word >> 8) & 0xf];
|
||||||
|
if (word >= 0x10)
|
||||||
|
*tp++ = _itoa_lower_digits[(word >> 4) & 0xf];
|
||||||
|
*tp++ = _itoa_lower_digits[word & 0xf];
|
||||||
|
return tp;
|
||||||
|
}
|
||||||
|
|
||||||
if (SPRINTF ((tmp, fmt, src[0], src[1], src[2], src[3])) >= size)
|
static __always_inline char *
|
||||||
|
inet_ntop4_format (const uint8_t *src, char *dst)
|
||||||
|
{
|
||||||
|
dst = put_uint8 (src[0], dst);
|
||||||
|
*(dst++) = '.';
|
||||||
|
dst = put_uint8 (src[1], dst);
|
||||||
|
*(dst++) = '.';
|
||||||
|
dst = put_uint8 (src[2], dst);
|
||||||
|
*(dst++) = '.';
|
||||||
|
dst = put_uint8 (src[3], dst);
|
||||||
|
*dst++ = '\0';
|
||||||
|
return dst;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __always_inline const char *
|
||||||
|
inet_ntop4 (const uint8_t *src, char *dst, socklen_t size)
|
||||||
|
{
|
||||||
|
if (size >= INET_ADDRSTRLEN)
|
||||||
|
{
|
||||||
|
inet_ntop4_format (src, dst);
|
||||||
|
return dst;
|
||||||
|
}
|
||||||
|
|
||||||
|
char tmp[INET_ADDRSTRLEN];
|
||||||
|
char *tp = inet_ntop4_format (src, tmp);
|
||||||
|
socklen_t tmp_s = tp - tmp;
|
||||||
|
if (tmp_s > size)
|
||||||
{
|
{
|
||||||
__set_errno (ENOSPC);
|
__set_errno (ENOSPC);
|
||||||
return (NULL);
|
return NULL;
|
||||||
}
|
}
|
||||||
return strcpy (dst, tmp);
|
return memcpy (dst, tmp, tmp_s);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* const char *
|
struct best_t
|
||||||
* inet_ntop6(src, dst, size)
|
|
||||||
* convert IPv6 binary address into presentation (printable) format
|
|
||||||
* author:
|
|
||||||
* Paul Vixie, 1996.
|
|
||||||
*/
|
|
||||||
static const char *
|
|
||||||
inet_ntop6 (const u_char *src, char *dst, socklen_t size)
|
|
||||||
{
|
{
|
||||||
/*
|
int base;
|
||||||
* Note that int32_t and int16_t need only be "at least" large enough
|
int len;
|
||||||
* to contain a value of the specified size. On some systems, like
|
};
|
||||||
* Crays, there is no such thing as an integer variable with 16 bits.
|
|
||||||
* Keep this in mind if you think this function should have been coded
|
|
||||||
* to use pointer overlays. All the world's not a VAX.
|
|
||||||
*/
|
|
||||||
char tmp[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255"], *tp;
|
|
||||||
struct
|
|
||||||
{
|
|
||||||
int base, len;
|
|
||||||
} best, cur;
|
|
||||||
u_int words[NS_IN6ADDRSZ / NS_INT16SZ];
|
|
||||||
int i;
|
|
||||||
|
|
||||||
/*
|
static inline uint16_t
|
||||||
* Preprocess:
|
in6_addr_addr16 (const struct in6_addr *src, int idx)
|
||||||
* Copy the input (bytewise) array into a wordwise array.
|
{
|
||||||
* Find the longest run of 0x00's in src[] for :: shorthanding.
|
const struct { uint16_t x; } __attribute__((__packed__)) *pptr =
|
||||||
*/
|
(typeof(pptr))(&src->s6_addr16[idx]);
|
||||||
memset (words, '\0', sizeof words);
|
return ntohs (pptr->x);
|
||||||
for (i = 0; i < NS_IN6ADDRSZ; i += 2)
|
}
|
||||||
words[i / 2] = (src[i] << 8) | src[i + 1];
|
|
||||||
best.base = -1;
|
static __always_inline char *
|
||||||
cur.base = -1;
|
inet_ntop6_format (const struct in6_addr *src, struct best_t best, char *dst)
|
||||||
best.len = 0;
|
{
|
||||||
cur.len = 0;
|
char *tp = dst;
|
||||||
for (i = 0; i < (NS_IN6ADDRSZ / NS_INT16SZ); i++)
|
for (int i = 0; i < (NS_IN6ADDRSZ / NS_INT16SZ); i++)
|
||||||
{
|
{
|
||||||
if (words[i] == 0)
|
/* Are we inside the best run of 0x00's? */
|
||||||
|
if (best.base != -1 && i >= best.base && i < (best.base + best.len))
|
||||||
|
{
|
||||||
|
if (i == best.base)
|
||||||
|
*tp++ = ':';
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
/* Are we following an initial run of 0x00s or any real hex? */
|
||||||
|
if (i != 0)
|
||||||
|
*tp++ = ':';
|
||||||
|
/* Is this address an encapsulated IPv4? */
|
||||||
|
if (i == 6 && best.base == 0
|
||||||
|
&& (best.len == 6 || (best.len == 5
|
||||||
|
&& in6_addr_addr16 (src, 5) == 0xffff)))
|
||||||
|
{
|
||||||
|
if (!inet_ntop4 (src->s6_addr + 12, tp,
|
||||||
|
INET6_ADDRSTRLEN - (tp - dst)))
|
||||||
|
return NULL;
|
||||||
|
tp += strlen (tp);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
tp = put_uint16 (in6_addr_addr16 (src, i), tp);
|
||||||
|
}
|
||||||
|
/* Was it a trailing run of 0x00's? */
|
||||||
|
if (best.base != -1 && (best.base + best.len) == (NS_IN6ADDRSZ / NS_INT16SZ))
|
||||||
|
*tp++ = ':';
|
||||||
|
*tp++ = '\0';
|
||||||
|
|
||||||
|
return tp;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline const char *
|
||||||
|
inet_ntop6 (const struct in6_addr *src, char *dst, socklen_t size)
|
||||||
|
{
|
||||||
|
struct best_t best = { -1, 0 }, cur = { -1, 0 };
|
||||||
|
|
||||||
|
/* ind the longest run of 0x00's in src[] for :: shorthanding. */
|
||||||
|
for (int i = 0; i < (NS_IN6ADDRSZ / NS_INT16SZ); i++)
|
||||||
|
{
|
||||||
|
if (in6_addr_addr16 (src, i) == 0)
|
||||||
{
|
{
|
||||||
if (cur.base == -1)
|
if (cur.base == -1)
|
||||||
cur.base = i, cur.len = 1;
|
cur.base = i, cur.len = 1;
|
||||||
@@ -155,45 +174,37 @@ inet_ntop6 (const u_char *src, char *dst, socklen_t size)
|
|||||||
if (best.base != -1 && best.len < 2)
|
if (best.base != -1 && best.len < 2)
|
||||||
best.base = -1;
|
best.base = -1;
|
||||||
|
|
||||||
/*
|
if (size >= INET6_ADDRSTRLEN)
|
||||||
* Format the result.
|
|
||||||
*/
|
|
||||||
tp = tmp;
|
|
||||||
for (i = 0; i < (NS_IN6ADDRSZ / NS_INT16SZ); i++)
|
|
||||||
{
|
{
|
||||||
/* Are we inside the best run of 0x00's? */
|
inet_ntop6_format (src, best, dst);
|
||||||
if (best.base != -1 && i >= best.base && i < (best.base + best.len))
|
return dst;
|
||||||
{
|
|
||||||
if (i == best.base)
|
|
||||||
*tp++ = ':';
|
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
/* Are we following an initial run of 0x00s or any real hex? */
|
|
||||||
if (i != 0)
|
|
||||||
*tp++ = ':';
|
|
||||||
/* Is this address an encapsulated IPv4? */
|
|
||||||
if (i == 6 && best.base == 0
|
|
||||||
&& (best.len == 6 || (best.len == 5 && words[5] == 0xffff)))
|
|
||||||
{
|
|
||||||
if (!inet_ntop4 (src + 12, tp, sizeof tmp - (tp - tmp)))
|
|
||||||
return (NULL);
|
|
||||||
tp += strlen (tp);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
tp += SPRINTF ((tp, "%x", words[i]));
|
|
||||||
}
|
|
||||||
/* Was it a trailing run of 0x00's? */
|
|
||||||
if (best.base != -1 && (best.base + best.len) == (NS_IN6ADDRSZ / NS_INT16SZ))
|
|
||||||
*tp++ = ':';
|
|
||||||
*tp++ = '\0';
|
|
||||||
|
|
||||||
/*
|
char tmp[INET6_ADDRSTRLEN];
|
||||||
* Check for overflow, copy, and we're done.
|
char *tp = inet_ntop6_format (src, best, tmp);
|
||||||
*/
|
|
||||||
if ((socklen_t) (tp - tmp) > size)
|
socklen_t tmp_s = tp - tmp;
|
||||||
|
if (tmp_s > size)
|
||||||
{
|
{
|
||||||
__set_errno (ENOSPC);
|
__set_errno (ENOSPC);
|
||||||
return (NULL);
|
return (NULL);
|
||||||
}
|
}
|
||||||
return strcpy (dst, tmp);
|
return memcpy (dst, tmp, tmp_s);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const char *
|
||||||
|
__inet_ntop (int af, const void *src, char *dst, socklen_t size)
|
||||||
|
{
|
||||||
|
switch (af)
|
||||||
|
{
|
||||||
|
case AF_INET:
|
||||||
|
return (inet_ntop4 (src, dst, size));
|
||||||
|
case AF_INET6:
|
||||||
|
return (inet_ntop6 (src, dst, size));
|
||||||
|
default:
|
||||||
|
__set_errno (EAFNOSUPPORT);
|
||||||
|
return (NULL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
libc_hidden_def (__inet_ntop)
|
||||||
|
weak_alias (__inet_ntop, inet_ntop)
|
||||||
|
Reference in New Issue
Block a user