mirror of
https://sourceware.org/git/glibc.git
synced 2025-07-28 00:21:52 +03:00
Framework for performance benchmarking of functions
See benchtests/Makefile to know how to use it.
This commit is contained in:
17
ChangeLog
17
ChangeLog
@ -1,3 +1,20 @@
|
|||||||
|
2013-03-15 Siddhesh Poyarekar <siddhesh@redhat.com>
|
||||||
|
Richard Henderson <rth@redhat.com>
|
||||||
|
Tulio Magno Quites Machado Filho <tuliom@linux.vnet.ibm.com>
|
||||||
|
|
||||||
|
* Makefile.in (bench): New target.
|
||||||
|
* NEWS: Mention the benchmark framework.
|
||||||
|
* Rules (bench): Likewise.
|
||||||
|
(binaries-bench): Generate binaries for functions to
|
||||||
|
benchmark.
|
||||||
|
* benchtests/Makefile: New makefile for benchmark tests.
|
||||||
|
* benchtests/bench-skeleton.c: New skeleton file for benchmark
|
||||||
|
programs.
|
||||||
|
* benchtests/exp-inputs: New input file for EXP function.
|
||||||
|
* benchtests/pow-inputs: New input file for POW function.
|
||||||
|
* scripts/bench.pl: New script to generate source files for
|
||||||
|
benchmark programs.
|
||||||
|
|
||||||
2013-03-15 Siddhesh Poyarekar <siddhesh@redhat.com>
|
2013-03-15 Siddhesh Poyarekar <siddhesh@redhat.com>
|
||||||
|
|
||||||
* sysdeps/ieee754/dbl-64/mpa-arch.h: New file.
|
* sysdeps/ieee754/dbl-64/mpa-arch.h: New file.
|
||||||
|
@ -3,7 +3,7 @@ srcdir = @srcdir@
|
|||||||
# Uncomment the line below if you want to do parallel build.
|
# Uncomment the line below if you want to do parallel build.
|
||||||
# PARALLELMFLAGS = -j 4
|
# PARALLELMFLAGS = -j 4
|
||||||
|
|
||||||
.PHONY: all install
|
.PHONY: all install bench
|
||||||
|
|
||||||
all .DEFAULT:
|
all .DEFAULT:
|
||||||
$(MAKE) -r PARALLELMFLAGS="$(PARALLELMFLAGS)" -C $(srcdir) objdir=`pwd` $@
|
$(MAKE) -r PARALLELMFLAGS="$(PARALLELMFLAGS)" -C $(srcdir) objdir=`pwd` $@
|
||||||
@ -11,3 +11,6 @@ all .DEFAULT:
|
|||||||
install:
|
install:
|
||||||
LANGUAGE=C LC_ALL=C; export LANGUAGE LC_ALL; \
|
LANGUAGE=C LC_ALL=C; export LANGUAGE LC_ALL; \
|
||||||
$(MAKE) -r PARALLELMFLAGS="$(PARALLELMFLAGS)" -C $(srcdir) objdir=`pwd` $@
|
$(MAKE) -r PARALLELMFLAGS="$(PARALLELMFLAGS)" -C $(srcdir) objdir=`pwd` $@
|
||||||
|
|
||||||
|
bench:
|
||||||
|
$(MAKE) -C $(srcdir)/benchtests $(PARALLELMFLAGS) objdir=`pwd` $@
|
||||||
|
2
NEWS
2
NEWS
@ -23,6 +23,8 @@ Version 2.18
|
|||||||
* Support for priority inherited mutexes in pthread condition variables on
|
* Support for priority inherited mutexes in pthread condition variables on
|
||||||
non-x86 architectures.
|
non-x86 architectures.
|
||||||
|
|
||||||
|
* Added a benchmark framework to track performance of functions in glibc.
|
||||||
|
|
||||||
|
|
||||||
Version 2.17
|
Version 2.17
|
||||||
|
|
||||||
|
27
Rules
27
Rules
@ -83,7 +83,7 @@ common-generated += dummy.o dummy.c
|
|||||||
|
|
||||||
# This makes all the auxiliary and test programs.
|
# This makes all the auxiliary and test programs.
|
||||||
|
|
||||||
.PHONY: others tests
|
.PHONY: others tests bench
|
||||||
ifeq ($(multi-arch),no)
|
ifeq ($(multi-arch),no)
|
||||||
tests := $(filter-out $(tests-ifunc), $(tests))
|
tests := $(filter-out $(tests-ifunc), $(tests))
|
||||||
xtests := $(filter-out $(xtests-ifunc), $(xtests))
|
xtests := $(filter-out $(xtests-ifunc), $(xtests))
|
||||||
@ -188,6 +188,31 @@ $(objpfx)%.out: /dev/null $(objpfx)% # Make it 2nd arg for canned sequence.
|
|||||||
$(make-test-out) > $@
|
$(make-test-out) > $@
|
||||||
|
|
||||||
endif # tests
|
endif # tests
|
||||||
|
|
||||||
|
# Build and run benchmark programs.
|
||||||
|
binaries-bench := $(addprefix $(objpfx)bench-,$(bench))
|
||||||
|
|
||||||
|
run-bench = $(test-wrapper-env) \
|
||||||
|
GCONV_PATH=$(common-objpfx)iconvdata LC_ALL=C \
|
||||||
|
$($*-ENV) $(run-via-rtld-prefix) $${run}
|
||||||
|
|
||||||
|
bench: $(binaries-bench)
|
||||||
|
if [ -f $(objpfx)bench.out ]; then \
|
||||||
|
mv -f $(objpfx)bench.out $(objpfx)bench.out.old; \
|
||||||
|
fi
|
||||||
|
for run in $^; do \
|
||||||
|
eval $(run-bench) >> $(objpfx)bench.out; \
|
||||||
|
done
|
||||||
|
|
||||||
|
$(binaries-bench): %: %.o \
|
||||||
|
$(sort $(filter $(common-objpfx)lib%,$(link-libc))) \
|
||||||
|
$(addprefix $(csu-objpfx),start.o) $(+preinit) $(+postinit)
|
||||||
|
$(+link)
|
||||||
|
|
||||||
|
$(objpfx)bench-%.c: %-inputs bench-skeleton.c
|
||||||
|
$(..)scripts/bench.pl $(patsubst %-inputs,%,$<) \
|
||||||
|
$($*-ITER) $($*-ARGLIST) $($*-RET) > $@
|
||||||
|
|
||||||
|
|
||||||
.PHONY: distclean realclean subdir_distclean subdir_realclean \
|
.PHONY: distclean realclean subdir_distclean subdir_realclean \
|
||||||
subdir_clean subdir_mostlyclean subdir_testclean
|
subdir_clean subdir_mostlyclean subdir_testclean
|
||||||
|
59
benchtests/Makefile
Normal file
59
benchtests/Makefile
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
# Copyright (C) 2013 Free Software Foundation, Inc.
|
||||||
|
# This file is part of the GNU C Library.
|
||||||
|
|
||||||
|
# The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU Lesser General Public
|
||||||
|
# License as published by the Free Software Foundation; either
|
||||||
|
# version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
# The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
# Lesser General Public License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU Lesser General Public
|
||||||
|
# License along with the GNU C Library; if not, see
|
||||||
|
# <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
|
||||||
|
# Makefile for benchmark tests. The only useful target here is `bench`.
|
||||||
|
|
||||||
|
# Adding a new function `foo`:
|
||||||
|
# ---------------------------
|
||||||
|
|
||||||
|
# - Append the function name to the bench variable
|
||||||
|
|
||||||
|
# - Define foo-ITER with the number of iterations you want to run. Keep it
|
||||||
|
# high enough that the overhead of clock_gettime is only a small fraction of
|
||||||
|
# the total run time of the test.
|
||||||
|
|
||||||
|
# - Define foo-ARGLIST as a colon separated list of types of the input
|
||||||
|
# arguments. Use `void` if function does not take any inputs. Put in quotes
|
||||||
|
# if the input argument is a pointer, e.g.:
|
||||||
|
|
||||||
|
# malloc-ARGLIST: "void *"
|
||||||
|
|
||||||
|
# - Define foo-RET as the type the function returns. Skip if the function
|
||||||
|
# returns void. One could even skip foo-ARGLIST if the function does not
|
||||||
|
# take any inputs AND the function returns void.
|
||||||
|
|
||||||
|
|
||||||
|
# - Make a file called `foo-inputs` with one input value per line, an input
|
||||||
|
# being a comma separated list of arguments to be passed into the function.
|
||||||
|
# See pow-inputs for an example.
|
||||||
|
|
||||||
|
subdir := benchtests
|
||||||
|
bench := exp pow
|
||||||
|
|
||||||
|
exp-ITER = 100000
|
||||||
|
exp-ARGLIST = double
|
||||||
|
exp-RET = double
|
||||||
|
LDFLAGS-bench-exp = -lm
|
||||||
|
|
||||||
|
pow-ITER = 100000
|
||||||
|
pow-ARGLIST = double:double
|
||||||
|
pow-RET = double
|
||||||
|
LDFLAGS-bench-pow = -lm
|
||||||
|
|
||||||
|
include ../Makeconfig
|
||||||
|
include ../Rules
|
75
benchtests/bench-skeleton.c
Normal file
75
benchtests/bench-skeleton.c
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
/* Skeleton for benchmark programs.
|
||||||
|
Copyright (C) 2013 Free Software Foundation, Inc.
|
||||||
|
This file is part of the GNU C Library.
|
||||||
|
|
||||||
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License as published by the Free Software Foundation; either
|
||||||
|
version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with the GNU C Library; if not, see
|
||||||
|
<http://www.gnu.org/licenses/>. */
|
||||||
|
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <time.h>
|
||||||
|
#include <inttypes.h>
|
||||||
|
|
||||||
|
int
|
||||||
|
main (int argc, char **argv)
|
||||||
|
{
|
||||||
|
unsigned long i, j, k;
|
||||||
|
uint64_t total = 0, max = 0, min = 0x7fffffffffffffff;
|
||||||
|
struct timespec start, end;
|
||||||
|
|
||||||
|
memset (&start, 0, sizeof (start));
|
||||||
|
memset (&end, 0, sizeof (end));
|
||||||
|
|
||||||
|
clock_getres (CLOCK_PROCESS_CPUTIME_ID, &start);
|
||||||
|
|
||||||
|
/* Measure 1000 times the resolution of the clock. So for a 1ns resolution
|
||||||
|
clock, we measure 1000 iterations of the function call at a time.
|
||||||
|
Measurements close to the minimum clock resolution won't make much sense,
|
||||||
|
but it's better than having nothing at all. */
|
||||||
|
unsigned long iters = 1000 * start.tv_nsec;
|
||||||
|
unsigned long total_iters = ITER / iters;
|
||||||
|
|
||||||
|
for (i = 0; i < NUM_SAMPLES; i++)
|
||||||
|
{
|
||||||
|
for (j = 0; j < total_iters; j ++)
|
||||||
|
{
|
||||||
|
clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &start);
|
||||||
|
for (k = 0; k < iters; k++)
|
||||||
|
BENCH_FUNC(i);
|
||||||
|
clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &end);
|
||||||
|
|
||||||
|
uint64_t cur = (end.tv_nsec - start.tv_nsec
|
||||||
|
+ ((end.tv_sec - start.tv_sec)
|
||||||
|
* (uint64_t) 1000000000));
|
||||||
|
|
||||||
|
if (cur > max)
|
||||||
|
max = cur;
|
||||||
|
|
||||||
|
if (cur < min)
|
||||||
|
min = cur;
|
||||||
|
|
||||||
|
total += cur;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
double d_total_s = total * 1e-9;
|
||||||
|
double d_iters = iters;
|
||||||
|
double d_total_i = (double)ITER * NUM_SAMPLES;
|
||||||
|
printf (FUNCNAME ": ITERS:%g: TOTAL:%gs, MAX:%gns, MIN:%gns, %g iter/s\n",
|
||||||
|
d_total_i, d_total_s, max / d_iters, min / d_iters,
|
||||||
|
d_total_i / d_total_s);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
1
benchtests/exp-inputs
Normal file
1
benchtests/exp-inputs
Normal file
@ -0,0 +1 @@
|
|||||||
|
708.00096423260981737257679924368858
|
1
benchtests/pow-inputs
Normal file
1
benchtests/pow-inputs
Normal file
@ -0,0 +1 @@
|
|||||||
|
1.0000000000000020, 1.5
|
93
scripts/bench.pl
Executable file
93
scripts/bench.pl
Executable file
@ -0,0 +1,93 @@
|
|||||||
|
#! /usr/bin/perl -w
|
||||||
|
# Copyright (C) 2013 Free Software Foundation, Inc.
|
||||||
|
# This file is part of the GNU C Library.
|
||||||
|
|
||||||
|
# The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU Lesser General Public
|
||||||
|
# License as published by the Free Software Foundation; either
|
||||||
|
# version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
# The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
# Lesser General Public License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU Lesser General Public
|
||||||
|
# License along with the GNU C Library; if not, see
|
||||||
|
# <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
# Generate a benchmark source file for a given input.
|
||||||
|
|
||||||
|
if (@ARGV < 2) {
|
||||||
|
die "Usage: bench.pl <function> <iterations> [parameter types] [return type]"
|
||||||
|
}
|
||||||
|
|
||||||
|
my $arg;
|
||||||
|
my $func = $ARGV[0];
|
||||||
|
my $iters = $ARGV[1];
|
||||||
|
my @args;
|
||||||
|
my $ret = "void";
|
||||||
|
my $getret = "";
|
||||||
|
my $retval = "";
|
||||||
|
|
||||||
|
if (@ARGV >= 3) {
|
||||||
|
@args = split(':', $ARGV[2]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (@ARGV == 4) {
|
||||||
|
$ret = $ARGV[3];
|
||||||
|
}
|
||||||
|
|
||||||
|
my $decl = "extern $ret $func (";
|
||||||
|
|
||||||
|
if (@args == 0 || $args[0] eq "void") {
|
||||||
|
print "$decl void);\n";
|
||||||
|
print "#define CALL_BENCH_FUNC(j) $func();\n";
|
||||||
|
print "#define NUM_SAMPLES (1)\n";
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
my $num = 0;
|
||||||
|
my $bench_func = "#define CALL_BENCH_FUNC(j) $func (";
|
||||||
|
my $struct = "struct args {";
|
||||||
|
|
||||||
|
foreach $arg (@args) {
|
||||||
|
if ($num > 0) {
|
||||||
|
$bench_func = "$bench_func,";
|
||||||
|
$decl = "$decl,";
|
||||||
|
}
|
||||||
|
|
||||||
|
$struct = "$struct $arg arg$num;";
|
||||||
|
$bench_func = "$bench_func in[j].arg$num";
|
||||||
|
$decl = "$decl $arg";
|
||||||
|
$num = $num + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
print "$decl);\n";
|
||||||
|
print "$bench_func);\n";
|
||||||
|
print "$struct } in[] = {";
|
||||||
|
|
||||||
|
open INPUTS, "<$func-inputs" or die $!;
|
||||||
|
|
||||||
|
while (<INPUTS>) {
|
||||||
|
chomp;
|
||||||
|
print "{$_},\n";
|
||||||
|
}
|
||||||
|
print "};\n";
|
||||||
|
print "#define NUM_SAMPLES (sizeof (in) / sizeof (struct args))\n"
|
||||||
|
}
|
||||||
|
|
||||||
|
# In some cases not storing a return value seems to result in the function call
|
||||||
|
# being optimized out.
|
||||||
|
if ($ret ne "void") {
|
||||||
|
print "static volatile $ret ret = 0.0;\n";
|
||||||
|
$getret = "ret = ";
|
||||||
|
}
|
||||||
|
|
||||||
|
print "#define BENCH_FUNC(j) ({$getret CALL_BENCH_FUNC (j);})\n";
|
||||||
|
|
||||||
|
print "#define ITER $iters\n";
|
||||||
|
print "#define FUNCNAME \"$func\"\n";
|
||||||
|
print "#include \"bench-skeleton.c\"\n";
|
Reference in New Issue
Block a user