1
0
mirror of https://github.com/facebook/zstd.git synced 2025-07-29 11:21:22 +03:00

[versions-test] Work around bug in dictionary builder for older versions

Older versions of zstandard have a bug in the dictionary builder, that
can cause dictionary building to fail. The process still exits 0, but
the dictionary is not created.

For reference, the bug is that it creates a dictionary that starts with
the zstd dictionary magic, in the process of writing the dictionary header,
but the header isn't fully written yet, and zstd fails compressions in
this case, because the dictionary is malformated. We fixed this later on
by trying to load the dictionary as a zstd dictionary, but if that fails
we fallback to content only (by default).

The fix is to:
1. Make the dictionary determinsitic by sorting the input files.
   Previously the bug would only sometimes occur, when the input files
   were in a particular order.
2. If dictionary creation fails, fallback to the `head` dictionary.
This commit is contained in:
Nick Terrell
2023-01-19 12:21:31 -08:00
committed by Nick Terrell
parent 666944fbe6
commit 667eb6d4fd

View File

@ -85,18 +85,23 @@ def get_git_tags():
return tags
def create_dict(tag, dict_source_path):
def create_dict(tag, dict_source_path, fallback_tag=None):
dict_name = 'dict.' + tag
if not os.path.isfile(dict_name):
cFiles = glob.glob(dict_source_path + "/*.c")
hFiles = glob.glob(dict_source_path + "/*.h")
# Ensure the dictionary builder is deterministic
files = sorted(cFiles + hFiles)
if tag == 'v0.5.0':
result = execute('./dictBuilder.' + tag + ' ' + ' '.join(cFiles) + ' ' + ' '.join(hFiles) + ' -o ' + dict_name, print_output=False, param_shell=True)
result = execute('./dictBuilder.' + tag + ' ' + ' '.join(files) + ' -o ' + dict_name, print_output=False, param_shell=True)
else:
result = execute('./zstd.' + tag + ' -f --train ' + ' '.join(cFiles) + ' ' + ' '.join(hFiles) + ' -o ' + dict_name, print_output=False, param_shell=True)
if result == 0:
result = execute('./zstd.' + tag + ' -f --train ' + ' '.join(files) + ' -o ' + dict_name, print_output=False, param_shell=True)
if result == 0 and os.path.isfile(dict_name):
print(dict_name + ' created')
assert os.path.isfile(dict_name)
elif fallback_tag is not None:
fallback_dict_name = 'dict.' + fallback_tag
print('creating dictionary ' + dict_name + ' failed, falling back to ' + fallback_dict_name)
shutil.copy(fallback_dict_name, dict_name)
else:
raise RuntimeError('ERROR: creating of ' + dict_name + ' failed')
else:
@ -272,10 +277,11 @@ if __name__ == '__main__':
print('Compress test.dat by all released zstd')
print('-----------------------------------------------')
create_dict(head, dict_source_path)
for tag in tags:
print(tag)
if tag >= 'v0.5.0':
create_dict(tag, dict_source_path)
create_dict(tag, dict_source_path, head)
dict_compress_sample(tag, test_dat)
remove_duplicates()
decompress_dict(tag)