mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-04-18 21:44:02 +03:00
148 lines
5.1 KiB
Python
148 lines
5.1 KiB
Python
import io
|
|
import sys
|
|
import argparse
|
|
import json
|
|
from pathlib import Path
|
|
import os
|
|
import configparser
|
|
import re
|
|
import traceback
|
|
|
|
|
|
cloudPath = None
|
|
metaPath = None
|
|
journalPath = None
|
|
cachePath = None
|
|
bigObjectSet = set()
|
|
|
|
def get_envvar(match):
|
|
return os.environ[match.group(1)]
|
|
|
|
def resolve_envvars(setting):
|
|
result = str(setting)
|
|
pattern = ("\$\{(.*)\}")
|
|
result = re.sub(pattern, get_envvar, setting)
|
|
return result
|
|
|
|
def parseArgs():
|
|
global cloudPath
|
|
global metaPath
|
|
global journalPath
|
|
global cachePath
|
|
|
|
parser = argparse.ArgumentParser(description="Verifies that the fake-cloud and cache contain what the metadata files say")
|
|
parser.add_argument("config_file", type=str, help="The storagemanager.cnf file")
|
|
args = parser.parse_args()
|
|
config = configparser.ConfigParser()
|
|
try:
|
|
config.read(args.config_file)
|
|
cloudPath = Path(resolve_envvars(config["LocalStorage"]["path"]))
|
|
metaPath = Path(resolve_envvars(config["ObjectStorage"]["metadata_path"]))
|
|
cachePath = Path(resolve_envvars(config["Cache"]["path"]))
|
|
journalPath = Path(resolve_envvars(config["ObjectStorage"]["journal_path"]))
|
|
#print("{}\n{}\n{}\n{}".format(cloudPath, metaPath, cachePath, journalPath))
|
|
|
|
except Exception as e:
|
|
parser.error("Failed to parse the config file. Got '{}'".format(e))
|
|
|
|
if not Path(cloudPath).is_dir() or not Path(metaPath).is_dir() or not Path(journalPath).is_dir() or not Path(cachePath).is_dir():
|
|
parser.error("cloudpath, metapath, and journalpath need to be directories.")
|
|
|
|
def key_breakout(key):
|
|
return key.split("_", 3)
|
|
|
|
def validateMetadata(metafile):
|
|
try:
|
|
metadata = json.load(open(metafile))
|
|
|
|
for obj in metadata["objects"]:
|
|
bigObjectSet.add(obj["key"])
|
|
fields = key_breakout(obj["key"])
|
|
cPath = cachePath / obj["key"]
|
|
l_cloudPath = cloudPath / obj["key"]
|
|
#if fields[2] != obj["length"]:
|
|
# print("object {}: in metadata length is {}, key says {}".format(obj["key"], obj["length"], fields[2]))
|
|
if fields[1] != obj["offset"]:
|
|
print("object {}: in metadata offset is {}, key says {}".format(obj["key"], obj["offset"], fields[1]))
|
|
|
|
realSize = -1
|
|
if cPath.exists():
|
|
inCache = True
|
|
realSize = cPath.stat().st_size
|
|
else:
|
|
inCache = False
|
|
if l_cloudPath.exists():
|
|
inCloud = True
|
|
realSize = l_cloudPath.stat().st_size
|
|
else:
|
|
inCloud = False
|
|
if not inCache and not inCloud:
|
|
print("{} does not exist in cache or the cloud".format(obj["key"]))
|
|
continue
|
|
|
|
# There are a couple cases where the length field and actual file size legitmately
|
|
# don't match.
|
|
# 1) IOC::truncate() currently doesn't rename the object on truncate for
|
|
# performance reasons.
|
|
# 2) IOC::write() currently does the same on modifying an existing object.
|
|
# In that case, we can validate the length by parsing the journal file as well.
|
|
#if int(obj["length"]) != realSize:
|
|
# print("{} has the wrong length in its key. Actual length is {}.".format(obj["key"], realSize))
|
|
|
|
except Exception as e:
|
|
print("Failed to parse {}, got {}".format(metafile, e))
|
|
traceback.print_exc()
|
|
|
|
|
|
def walkMetaDir(basepath):
|
|
for p in basepath.iterdir():
|
|
if p.is_dir():
|
|
#print("Recursing on {}".format(p))
|
|
walkMetaDir(p)
|
|
elif p.is_file():
|
|
if p.suffix == ".meta":
|
|
validateMetadata(p)
|
|
else:
|
|
print("{} is not a metadata file".format(p))
|
|
else:
|
|
print("{} is not a metadata file".format(p))
|
|
|
|
# Verifies that everything in journalPath has a corresponding object in cloud/cache
|
|
def verifyValidJournalFiles():
|
|
for p in journalPath.iterdir():
|
|
l_cachePath = cachePath/(p.stem);
|
|
l_cloudPath = cloudPath/(p.stem);
|
|
if not l_cachePath.is_file() and not l_cloudPath.is_file():
|
|
print("Journal file {} has no corresponding object in cache or cloud storage".format(p))
|
|
|
|
def verifyNoOrphans():
|
|
for path in cloudPath.iterdir():
|
|
if path.name not in bigObjectSet:
|
|
print("{} is in cloud storage but not referenced by any metadata file".format(path.name))
|
|
|
|
|
|
for path in cachePath.iterdir():
|
|
if path.name not in bigObjectSet:
|
|
print("{} is in the cache but not referenced by any metadata file".format(path.name))
|
|
|
|
def main():
|
|
parseArgs()
|
|
|
|
print("Verifying that all objects in metadata exist in cloud storage or the cache")
|
|
walkMetaDir(metaPath)
|
|
print("Verifying that all journal files have a corresponding object")
|
|
verifyValidJournalFiles()
|
|
print("Verifying that all objects in cloud & cache are referenced by metadata")
|
|
verifyNoOrphans()
|
|
print("Done")
|
|
sys.exit(0)
|
|
|
|
|
|
if sys.version_info < (3, 5):
|
|
print("Please use python version 3.5 or greater")
|
|
sys.exit(1)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
|