You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-29 08:21:15 +03:00
Relocating everything in the repo s.t. it can be merged into
the columnstore repo.
This commit is contained in:
147
storage-manager/tools/check_metafile_consistency.py
Normal file
147
storage-manager/tools/check_metafile_consistency.py
Normal file
@ -0,0 +1,147 @@
|
||||
import io
|
||||
import sys
|
||||
import argparse
|
||||
import json
|
||||
from pathlib import Path
|
||||
import os
|
||||
import configparser
|
||||
import re
|
||||
import traceback
|
||||
|
||||
|
||||
cloudPath = None
|
||||
metaPath = None
|
||||
journalPath = None
|
||||
cachePath = None
|
||||
bigObjectSet = set()
|
||||
|
||||
def get_envvar(match):
|
||||
return os.environ[match.group(1)]
|
||||
|
||||
def resolve_envvars(setting):
|
||||
result = str(setting)
|
||||
pattern = ("\$\{(.*)\}")
|
||||
result = re.sub(pattern, get_envvar, setting)
|
||||
return result
|
||||
|
||||
def parseArgs():
|
||||
global cloudPath
|
||||
global metaPath
|
||||
global journalPath
|
||||
global cachePath
|
||||
|
||||
parser = argparse.ArgumentParser(description="Verifies that the fake-cloud and cache contain what the metadata files say")
|
||||
parser.add_argument("config_file", type=str, help="The storagemanager.cnf file")
|
||||
args = parser.parse_args()
|
||||
config = configparser.ConfigParser()
|
||||
try:
|
||||
config.read(args.config_file)
|
||||
cloudPath = Path(resolve_envvars(config["LocalStorage"]["path"]))
|
||||
metaPath = Path(resolve_envvars(config["ObjectStorage"]["metadata_path"]))
|
||||
cachePath = Path(resolve_envvars(config["Cache"]["path"]))
|
||||
journalPath = Path(resolve_envvars(config["ObjectStorage"]["journal_path"]))
|
||||
#print("{}\n{}\n{}\n{}".format(cloudPath, metaPath, cachePath, journalPath))
|
||||
|
||||
except Exception as e:
|
||||
parser.error("Failed to parse the config file. Got '{}'".format(e))
|
||||
|
||||
if not Path(cloudPath).is_dir() or not Path(metaPath).is_dir() or not Path(journalPath).is_dir() or not Path(cachePath).is_dir():
|
||||
parser.error("cloudpath, metapath, and journalpath need to be directories.")
|
||||
|
||||
def key_breakout(key):
|
||||
return key.split("_", 3)
|
||||
|
||||
def validateMetadata(metafile):
|
||||
try:
|
||||
metadata = json.load(open(metafile))
|
||||
|
||||
for obj in metadata["objects"]:
|
||||
bigObjectSet.add(obj["key"])
|
||||
fields = key_breakout(obj["key"])
|
||||
cPath = cachePath / obj["key"]
|
||||
l_cloudPath = cloudPath / obj["key"]
|
||||
#if fields[2] != obj["length"]:
|
||||
# print("object {}: in metadata length is {}, key says {}".format(obj["key"], obj["length"], fields[2]))
|
||||
if fields[1] != obj["offset"]:
|
||||
print("object {}: in metadata offset is {}, key says {}".format(obj["key"], obj["offset"], fields[1]))
|
||||
|
||||
realSize = -1
|
||||
if cPath.exists():
|
||||
inCache = True
|
||||
realSize = cPath.stat().st_size
|
||||
else:
|
||||
inCache = False
|
||||
if l_cloudPath.exists():
|
||||
inCloud = True
|
||||
realSize = l_cloudPath.stat().st_size
|
||||
else:
|
||||
inCloud = False
|
||||
if not inCache and not inCloud:
|
||||
print("{} does not exist in cache or the cloud".format(obj["key"]))
|
||||
continue
|
||||
|
||||
# There are a couple cases where the length field and actual file size legitmately
|
||||
# don't match.
|
||||
# 1) IOC::truncate() currently doesn't rename the object on truncate for
|
||||
# performance reasons.
|
||||
# 2) IOC::write() currently does the same on modifying an existing object.
|
||||
# In that case, we can validate the length by parsing the journal file as well.
|
||||
#if int(obj["length"]) != realSize:
|
||||
# print("{} has the wrong length in its key. Actual length is {}.".format(obj["key"], realSize))
|
||||
|
||||
except Exception as e:
|
||||
print("Failed to parse {}, got {}".format(metafile, e))
|
||||
traceback.print_exc()
|
||||
|
||||
|
||||
def walkMetaDir(basepath):
|
||||
for p in basepath.iterdir():
|
||||
if p.is_dir():
|
||||
#print("Recursing on {}".format(p))
|
||||
walkMetaDir(p)
|
||||
elif p.is_file():
|
||||
if p.suffix == ".meta":
|
||||
validateMetadata(p)
|
||||
else:
|
||||
print("{} is not a metadata file".format(p))
|
||||
else:
|
||||
print("{} is not a metadata file".format(p))
|
||||
|
||||
# Verifies that everything in journalPath has a corresponding object in cloud/cache
|
||||
def verifyValidJournalFiles():
|
||||
for p in journalPath.iterdir():
|
||||
l_cachePath = cachePath/(p.stem);
|
||||
l_cloudPath = cloudPath/(p.stem);
|
||||
if not l_cachePath.is_file() and not l_cloudPath.is_file():
|
||||
print("Journal file {} has no corresponding object in cache or cloud storage".format(p))
|
||||
|
||||
def verifyNoOrphans():
|
||||
for path in cloudPath.iterdir():
|
||||
if path.name not in bigObjectSet:
|
||||
print("{} is in cloud storage but not referenced by any metadata file".format(path.name))
|
||||
|
||||
|
||||
for path in cachePath.iterdir():
|
||||
if path.name not in bigObjectSet:
|
||||
print("{} is in the cache but not referenced by any metadata file".format(path.name))
|
||||
|
||||
def main():
|
||||
parseArgs()
|
||||
|
||||
print("Verifying that all objects in metadata exist in cloud storage or the cache")
|
||||
walkMetaDir(metaPath)
|
||||
print("Verifying that all journal files have a corresponding object")
|
||||
verifyValidJournalFiles()
|
||||
print("Verifying that all objects in cloud & cache are referenced by metadata")
|
||||
verifyNoOrphans()
|
||||
print("Done")
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
if sys.version_info < (3, 5):
|
||||
print("Please use python version 3.5 or greater")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
Reference in New Issue
Block a user