mirror of
https://github.com/facebook/zstd.git
synced 2025-08-01 09:47:01 +03:00
[automated_benchmarking] Make arguments optional and add --dict argument (#1968)
* Make arugments optional and add --dict argument * Removing accidental print statement * Change to more likely scenario for dictionary compression benchmark
This commit is contained in:
committed by
Yann Collet
parent
9a71d07aa4
commit
8fe562a770
@ -238,7 +238,7 @@ versionsTest: clean
|
|||||||
$(PYTHON) test-zstd-versions.py
|
$(PYTHON) test-zstd-versions.py
|
||||||
|
|
||||||
automated_benchmarking: clean
|
automated_benchmarking: clean
|
||||||
$(PYTHON) automated_benchmarking.py golden-compression 1 current 1 "" 60
|
$(PYTHON) automated_benchmarking.py
|
||||||
|
|
||||||
checkTag: checkTag.c $(ZSTDDIR)/zstd.h
|
checkTag: checkTag.c $(ZSTDDIR)/zstd.h
|
||||||
$(CC) $(FLAGS) $< -o $@$(EXT)
|
$(CC) $(FLAGS) $< -o $@$(EXT)
|
||||||
|
@ -33,21 +33,32 @@ pull requests from the zstd repo and compare facebook:dev to all of them once, c
|
|||||||
will continuously get pull requests from the zstd repo and run benchmarks against facebook:dev.
|
will continuously get pull requests from the zstd repo and run benchmarks against facebook:dev.
|
||||||
|
|
||||||
```
|
```
|
||||||
Example usage: python automated_benchmarking.py golden-compression 1 current 1 "" 60
|
Example usage: python automated_benchmarking.py
|
||||||
```
|
```
|
||||||
|
|
||||||
```
|
```
|
||||||
usage: automated_benchmarking.py [-h] directory levels mode emails
|
usage: automated_benchmarking.py [-h] [--directory DIRECTORY]
|
||||||
|
[--levels LEVELS] [--iterations ITERATIONS]
|
||||||
|
[--emails EMAILS] [--frequency FREQUENCY]
|
||||||
|
[--mode MODE] [--dict DICT]
|
||||||
|
|
||||||
positional arguments:
|
optional arguments:
|
||||||
directory directory with files to benchmark
|
-h, --help show this help message and exit
|
||||||
levels levels to test eg ('1,2,3')
|
--directory DIRECTORY
|
||||||
mode 'fastmode', 'onetime', 'current' or 'continuous'
|
directory with files to benchmark
|
||||||
iterations number of benchmark iterations to run
|
--levels LEVELS levels to test eg ('1,2,3')
|
||||||
emails email addresses of people who will be alerted upon regression.
|
--iterations ITERATIONS
|
||||||
Only for continuous mode
|
number of benchmark iterations to run
|
||||||
frequency specifies the number of seconds to wait before each successive
|
--emails EMAILS email addresses of people who will be alerted upon
|
||||||
check for new PRs in continuous mode
|
regression. Only for continuous mode
|
||||||
|
--frequency FREQUENCY
|
||||||
|
specifies the number of seconds to wait before each
|
||||||
|
successive check for new PRs in continuous mode
|
||||||
|
--mode MODE 'fastmode', 'onetime', 'current', or 'continuous' (see
|
||||||
|
README.md for details)
|
||||||
|
--dict DICT filename of dictionary to use (when set, this
|
||||||
|
dictioanry will be used to compress the files provided
|
||||||
|
inside --directory)
|
||||||
```
|
```
|
||||||
|
|
||||||
#### `test-zstd-speed.py` - script for testing zstd speed difference between commits
|
#### `test-zstd-speed.py` - script for testing zstd speed difference between commits
|
||||||
|
@ -94,16 +94,19 @@ def clone_and_build(build):
|
|||||||
return "../zstd"
|
return "../zstd"
|
||||||
|
|
||||||
|
|
||||||
|
def parse_benchmark_output(output):
|
||||||
|
idx = [i for i, d in enumerate(output) if d == "MB/s"]
|
||||||
|
return [float(output[idx[0] - 1]), float(output[idx[1] - 1])]
|
||||||
|
|
||||||
|
|
||||||
def benchmark_single(executable, level, filename):
|
def benchmark_single(executable, level, filename):
|
||||||
tmp = (
|
return parse_benchmark_output((
|
||||||
subprocess.run(
|
subprocess.run(
|
||||||
[executable, "-qb{}".format(level), filename], stderr=subprocess.PIPE
|
[executable, "-qb{}".format(level), filename], stderr=subprocess.PIPE
|
||||||
)
|
)
|
||||||
.stderr.decode("utf-8")
|
.stderr.decode("utf-8")
|
||||||
.split(" ")
|
.split(" ")
|
||||||
)
|
))
|
||||||
idx = [i for i, d in enumerate(tmp) if d == "MB/s"]
|
|
||||||
return [float(tmp[idx[0] - 1]), float(tmp[idx[1] - 1])]
|
|
||||||
|
|
||||||
|
|
||||||
def benchmark_n(executable, level, filename, n):
|
def benchmark_n(executable, level, filename, n):
|
||||||
@ -129,6 +132,45 @@ def benchmark(build, filenames, levels, iterations):
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def benchmark_dictionary_single(executable, filenames_directory, dictionary_filename, level, iterations):
|
||||||
|
cspeeds, dspeeds = [], []
|
||||||
|
for _ in range(iterations):
|
||||||
|
output = subprocess.run([executable, "-qb{}".format(level), "-D", dictionary_filename, "-r", filenames_directory], stderr=subprocess.PIPE).stderr.decode("utf-8").split(" ")
|
||||||
|
cspeed, dspeed = parse_benchmark_output(output)
|
||||||
|
cspeeds.append(cspeed)
|
||||||
|
dspeeds.append(dspeed)
|
||||||
|
max_cspeed, max_dspeed = max(cspeeds), max(dspeeds)
|
||||||
|
print(
|
||||||
|
"Bench (executable={} level={} filenames_directory={}, dictionary_filename={}, iterations={}):\n\t[cspeed: {} MB/s, dspeed: {} MB/s]".format(
|
||||||
|
os.path.basename(executable),
|
||||||
|
level,
|
||||||
|
os.path.basename(filenames_directory),
|
||||||
|
os.path.basename(dictionary_filename),
|
||||||
|
iterations,
|
||||||
|
max_cspeed,
|
||||||
|
max_dspeed,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return (max_cspeed, max_dspeed)
|
||||||
|
|
||||||
|
|
||||||
|
def benchmark_dictionary(build, filenames_directory, dictionary_filename, levels, iterations):
|
||||||
|
executable = clone_and_build(build)
|
||||||
|
return [benchmark_dictionary_single(executable, filenames_directory, dictionary_filename, l, iterations) for l in levels]
|
||||||
|
|
||||||
|
|
||||||
|
def parse_regressions_and_labels(old_cspeed, new_cspeed, old_dspeed, new_dspeed, baseline_build, test_build):
|
||||||
|
cspeed_reg = (old_cspeed - new_cspeed) / old_cspeed
|
||||||
|
dspeed_reg = (old_dspeed - new_dspeed) / old_dspeed
|
||||||
|
baseline_label = "{}:{} ({})".format(
|
||||||
|
baseline_build["user"], baseline_build["branch"], baseline_build["hash"]
|
||||||
|
)
|
||||||
|
test_label = "{}:{} ({})".format(
|
||||||
|
test_build["user"], test_build["branch"], test_build["hash"]
|
||||||
|
)
|
||||||
|
return cspeed_reg, dspeed_reg, baseline_label, test_label
|
||||||
|
|
||||||
|
|
||||||
def get_regressions(baseline_build, test_build, iterations, filenames, levels):
|
def get_regressions(baseline_build, test_build, iterations, filenames, levels):
|
||||||
old = benchmark(baseline_build, filenames, levels, iterations)
|
old = benchmark(baseline_build, filenames, levels, iterations)
|
||||||
new = benchmark(test_build, filenames, levels, iterations)
|
new = benchmark(test_build, filenames, levels, iterations)
|
||||||
@ -137,13 +179,8 @@ def get_regressions(baseline_build, test_build, iterations, filenames, levels):
|
|||||||
for k, filename in enumerate(filenames):
|
for k, filename in enumerate(filenames):
|
||||||
old_cspeed, old_dspeed = old[j][k]
|
old_cspeed, old_dspeed = old[j][k]
|
||||||
new_cspeed, new_dspeed = new[j][k]
|
new_cspeed, new_dspeed = new[j][k]
|
||||||
cspeed_reg = (old_cspeed - new_cspeed) / old_cspeed
|
cspeed_reg, dspeed_reg, baseline_build, test_label = parse_regressions_and_labels(
|
||||||
dspeed_reg = (old_dspeed - new_dspeed) / old_dspeed
|
old_cspeed, new_cspeed, old_dspeed, new_dspeed, baseline_build, test_build
|
||||||
baseline_label = "{}:{} ({})".format(
|
|
||||||
baseline_build["user"], baseline_build["branch"], baseline_build["hash"]
|
|
||||||
)
|
|
||||||
test_label = "{}:{} ({})".format(
|
|
||||||
test_build["user"], test_build["branch"], test_build["hash"]
|
|
||||||
)
|
)
|
||||||
if cspeed_reg > CSPEED_REGRESSION_TOLERANCE:
|
if cspeed_reg > CSPEED_REGRESSION_TOLERANCE:
|
||||||
regressions.append(
|
regressions.append(
|
||||||
@ -171,14 +208,58 @@ def get_regressions(baseline_build, test_build, iterations, filenames, levels):
|
|||||||
)
|
)
|
||||||
return regressions
|
return regressions
|
||||||
|
|
||||||
def main(filenames, levels, iterations, builds=None, emails=None, continuous=False, frequency=DEFAULT_MAX_API_CALL_FREQUENCY_SEC):
|
def get_regressions_dictionary(baseline_build, test_build, filenames_directory, dictionary_filename, levels, iterations):
|
||||||
|
old = benchmark_dictionary(baseline_build, filenames_directory, dictionary_filename, levels, iterations)
|
||||||
|
new = benchmark_dictionary(test_build, filenames_directory, dictionary_filename, levels, iterations)
|
||||||
|
regressions = []
|
||||||
|
for j, level in enumerate(levels):
|
||||||
|
old_cspeed, old_dspeed = old[j]
|
||||||
|
new_cspeed, new_dspeed = new[j]
|
||||||
|
cspeed_reg, dspeed_reg, baesline_label, test_label = parse_regressions_and_labels(
|
||||||
|
old_cspeed, new_cspeed, old_dspeed, new_dspeed, baseline_build, test_build
|
||||||
|
)
|
||||||
|
if cspeed_reg > CSPEED_REGRESSION_TOLERANCE:
|
||||||
|
regressions.append(
|
||||||
|
"[COMPRESSION REGRESSION] (level={} filenames_directory={} dictionary_filename={})\n\t{} -> {}\n\t{} -> {} ({:0.2f}%)".format(
|
||||||
|
level,
|
||||||
|
filenames_directory,
|
||||||
|
dictionary_filename,
|
||||||
|
baseline_label,
|
||||||
|
test_label,
|
||||||
|
old_cspeed,
|
||||||
|
new_cspeed,
|
||||||
|
cspeed_reg * 100.0,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if dspeed_reg > DSPEED_REGRESSION_TOLERANCE:
|
||||||
|
regressions.append(
|
||||||
|
"[DECOMPRESSION REGRESSION] (level={} filenames_directory={} dictionary_filename={})\n\t{} -> {}\n\t{} -> {} ({:0.2f}%)".format(
|
||||||
|
level,
|
||||||
|
filenames_directory,
|
||||||
|
dictionary_filename,
|
||||||
|
baseline_label,
|
||||||
|
test_label,
|
||||||
|
old_dspeed,
|
||||||
|
new_dspeed,
|
||||||
|
dspeed_reg * 100.0,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return regressions
|
||||||
|
|
||||||
|
|
||||||
|
def main(filenames, levels, iterations, builds=None, emails=None, continuous=False, frequency=DEFAULT_MAX_API_CALL_FREQUENCY_SEC, dictionary_filename=None):
|
||||||
if builds == None:
|
if builds == None:
|
||||||
builds = get_new_open_pr_builds()
|
builds = get_new_open_pr_builds()
|
||||||
while True:
|
while True:
|
||||||
for test_build in builds:
|
for test_build in builds:
|
||||||
|
if dictionary_filename == None:
|
||||||
regressions = get_regressions(
|
regressions = get_regressions(
|
||||||
MASTER_BUILD, test_build, iterations, filenames, levels
|
MASTER_BUILD, test_build, iterations, filenames, levels
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
|
regressions = get_regressions_dictionary(
|
||||||
|
MASTER_BUILD, test_build, filenames, dictionary_filename, levels, iterations
|
||||||
|
)
|
||||||
body = "\n".join(regressions)
|
body = "\n".join(regressions)
|
||||||
if len(regressions) > 0:
|
if len(regressions) > 0:
|
||||||
if emails != None:
|
if emails != None:
|
||||||
@ -198,42 +279,38 @@ def main(filenames, levels, iterations, builds=None, emails=None, continuous=Fal
|
|||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument(
|
|
||||||
"directory", help="directory with files to benchmark", default="fuzz"
|
parser.add_argument("--directory", help="directory with files to benchmark", default="golden-compression")
|
||||||
)
|
parser.add_argument("--levels", help="levels to test eg ('1,2,3')", default="1")
|
||||||
parser.add_argument("levels", help="levels to test eg ('1,2,3')", default="1,2,3")
|
parser.add_argument("--iterations", help="number of benchmark iterations to run", default="1")
|
||||||
parser.add_argument(
|
parser.add_argument("--emails", help="email addresses of people who will be alerted upon regression. Only for continuous mode", default=None)
|
||||||
"mode", help="'fastmode', 'onetime', 'current' or 'continuous'", default="onetime"
|
parser.add_argument("--frequency", help="specifies the number of seconds to wait before each successive check for new PRs in continuous mode", default=DEFAULT_MAX_API_CALL_FREQUENCY_SEC)
|
||||||
)
|
parser.add_argument("--mode", help="'fastmode', 'onetime', 'current', or 'continuous' (see README.md for details)", default="current")
|
||||||
parser.add_argument(
|
parser.add_argument("--dict", help="filename of dictionary to use (when set, this dictioanry will be used to compress the files provided inside --directory)", default=None)
|
||||||
"iterations", help="number of benchmark iterations to run", default=5
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"emails",
|
|
||||||
help="email addresses of people who will be alerted upon regression. Only for continuous mode",
|
|
||||||
default=None,
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"frequency",
|
|
||||||
help="specifies the number of seconds to wait before each successive check for new PRs in continuous mode",
|
|
||||||
default=DEFAULT_MAX_API_CALL_FREQUENCY_SEC
|
|
||||||
)
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
filenames = glob.glob("{}/**".format(args.directory))
|
filenames = args.directory
|
||||||
levels = [int(l) for l in args.levels.split(",")]
|
levels = [int(l) for l in args.levels.split(",")]
|
||||||
mode = args.mode
|
mode = args.mode
|
||||||
iterations = int(args.iterations)
|
iterations = int(args.iterations)
|
||||||
emails = args.emails
|
emails = args.emails
|
||||||
frequency = int(args.frequency)
|
frequency = int(args.frequency)
|
||||||
|
dictionary_filename = args.dict
|
||||||
|
|
||||||
|
if dictionary_filename == None:
|
||||||
|
filenames = glob.glob("{}/**".format(filenames))
|
||||||
|
|
||||||
|
if (len(filenames) == 0):
|
||||||
|
print("0 files found")
|
||||||
|
quit()
|
||||||
|
|
||||||
if mode == "onetime":
|
if mode == "onetime":
|
||||||
main(filenames, levels, iterations, frequency=frequency)
|
main(filenames, levels, iterations, frequency=frequenc, dictionary_filename=dictionary_filename)
|
||||||
elif mode == "current":
|
elif mode == "current":
|
||||||
builds = [{"user": None, "branch": "None", "hash": None}]
|
builds = [{"user": None, "branch": "None", "hash": None}]
|
||||||
main(filenames, levels, iterations, builds, frequency=frequency)
|
main(filenames, levels, iterations, builds, frequency=frequency, dictionary_filename=dictionary_filename)
|
||||||
elif mode == "fastmode":
|
elif mode == "fastmode":
|
||||||
builds = [{"user": "facebook", "branch": "master", "hash": None}]
|
builds = [{"user": "facebook", "branch": "master", "hash": None}]
|
||||||
main(filenames, levels, iterations, builds, frequency=frequency)
|
main(filenames, levels, iterations, builds, frequency=frequency, dictionary_filename=dictionary_filename)
|
||||||
else:
|
else:
|
||||||
main(filenames, levels, iterations, None, emails, True, frequency=frequency)
|
main(filenames, levels, iterations, None, emails, True, frequency=frequency, dictionary_filename=dictionary_filename)
|
||||||
|
Reference in New Issue
Block a user