mirror of
https://github.com/facebook/zstd.git
synced 2025-07-29 11:21:22 +03:00
[versions-test] Work around bug in dictionary builder for older versions
Older versions of zstandard have a bug in the dictionary builder, that can cause dictionary building to fail. The process still exits 0, but the dictionary is not created. For reference, the bug is that it creates a dictionary that starts with the zstd dictionary magic, in the process of writing the dictionary header, but the header isn't fully written yet, and zstd fails compressions in this case, because the dictionary is malformated. We fixed this later on by trying to load the dictionary as a zstd dictionary, but if that fails we fallback to content only (by default). The fix is to: 1. Make the dictionary determinsitic by sorting the input files. Previously the bug would only sometimes occur, when the input files were in a particular order. 2. If dictionary creation fails, fallback to the `head` dictionary.
This commit is contained in:
committed by
Nick Terrell
parent
666944fbe6
commit
667eb6d4fd
@ -85,18 +85,23 @@ def get_git_tags():
|
||||
return tags
|
||||
|
||||
|
||||
def create_dict(tag, dict_source_path):
|
||||
def create_dict(tag, dict_source_path, fallback_tag=None):
|
||||
dict_name = 'dict.' + tag
|
||||
if not os.path.isfile(dict_name):
|
||||
cFiles = glob.glob(dict_source_path + "/*.c")
|
||||
hFiles = glob.glob(dict_source_path + "/*.h")
|
||||
# Ensure the dictionary builder is deterministic
|
||||
files = sorted(cFiles + hFiles)
|
||||
if tag == 'v0.5.0':
|
||||
result = execute('./dictBuilder.' + tag + ' ' + ' '.join(cFiles) + ' ' + ' '.join(hFiles) + ' -o ' + dict_name, print_output=False, param_shell=True)
|
||||
result = execute('./dictBuilder.' + tag + ' ' + ' '.join(files) + ' -o ' + dict_name, print_output=False, param_shell=True)
|
||||
else:
|
||||
result = execute('./zstd.' + tag + ' -f --train ' + ' '.join(cFiles) + ' ' + ' '.join(hFiles) + ' -o ' + dict_name, print_output=False, param_shell=True)
|
||||
if result == 0:
|
||||
result = execute('./zstd.' + tag + ' -f --train ' + ' '.join(files) + ' -o ' + dict_name, print_output=False, param_shell=True)
|
||||
if result == 0 and os.path.isfile(dict_name):
|
||||
print(dict_name + ' created')
|
||||
assert os.path.isfile(dict_name)
|
||||
elif fallback_tag is not None:
|
||||
fallback_dict_name = 'dict.' + fallback_tag
|
||||
print('creating dictionary ' + dict_name + ' failed, falling back to ' + fallback_dict_name)
|
||||
shutil.copy(fallback_dict_name, dict_name)
|
||||
else:
|
||||
raise RuntimeError('ERROR: creating of ' + dict_name + ' failed')
|
||||
else:
|
||||
@ -272,10 +277,11 @@ if __name__ == '__main__':
|
||||
print('Compress test.dat by all released zstd')
|
||||
print('-----------------------------------------------')
|
||||
|
||||
create_dict(head, dict_source_path)
|
||||
for tag in tags:
|
||||
print(tag)
|
||||
if tag >= 'v0.5.0':
|
||||
create_dict(tag, dict_source_path)
|
||||
create_dict(tag, dict_source_path, head)
|
||||
dict_compress_sample(tag, test_dat)
|
||||
remove_duplicates()
|
||||
decompress_dict(tag)
|
||||
|
Reference in New Issue
Block a user