1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-04-18 21:44:02 +03:00
mariadb-columnstore-engine/storage-manager/tools/check_metafile_consistency.py
2019-08-14 11:07:24 -05:00

148 lines
5.1 KiB
Python

import io
import sys
import argparse
import json
from pathlib import Path
import os
import configparser
import re
import traceback
cloudPath = None
metaPath = None
journalPath = None
cachePath = None
bigObjectSet = set()
def get_envvar(match):
return os.environ[match.group(1)]
def resolve_envvars(setting):
result = str(setting)
pattern = ("\$\{(.*)\}")
result = re.sub(pattern, get_envvar, setting)
return result
def parseArgs():
global cloudPath
global metaPath
global journalPath
global cachePath
parser = argparse.ArgumentParser(description="Verifies that the fake-cloud and cache contain what the metadata files say")
parser.add_argument("config_file", type=str, help="The storagemanager.cnf file")
args = parser.parse_args()
config = configparser.ConfigParser()
try:
config.read(args.config_file)
cloudPath = Path(resolve_envvars(config["LocalStorage"]["path"]))
metaPath = Path(resolve_envvars(config["ObjectStorage"]["metadata_path"]))
cachePath = Path(resolve_envvars(config["Cache"]["path"]))
journalPath = Path(resolve_envvars(config["ObjectStorage"]["journal_path"]))
#print("{}\n{}\n{}\n{}".format(cloudPath, metaPath, cachePath, journalPath))
except Exception as e:
parser.error("Failed to parse the config file. Got '{}'".format(e))
if not Path(cloudPath).is_dir() or not Path(metaPath).is_dir() or not Path(journalPath).is_dir() or not Path(cachePath).is_dir():
parser.error("cloudpath, metapath, and journalpath need to be directories.")
def key_breakout(key):
return key.split("_", 3)
def validateMetadata(metafile):
try:
metadata = json.load(open(metafile))
for obj in metadata["objects"]:
bigObjectSet.add(obj["key"])
fields = key_breakout(obj["key"])
cPath = cachePath / obj["key"]
l_cloudPath = cloudPath / obj["key"]
#if fields[2] != obj["length"]:
# print("object {}: in metadata length is {}, key says {}".format(obj["key"], obj["length"], fields[2]))
if fields[1] != obj["offset"]:
print("object {}: in metadata offset is {}, key says {}".format(obj["key"], obj["offset"], fields[1]))
realSize = -1
if cPath.exists():
inCache = True
realSize = cPath.stat().st_size
else:
inCache = False
if l_cloudPath.exists():
inCloud = True
realSize = l_cloudPath.stat().st_size
else:
inCloud = False
if not inCache and not inCloud:
print("{} does not exist in cache or the cloud".format(obj["key"]))
continue
# There are a couple cases where the length field and actual file size legitmately
# don't match.
# 1) IOC::truncate() currently doesn't rename the object on truncate for
# performance reasons.
# 2) IOC::write() currently does the same on modifying an existing object.
# In that case, we can validate the length by parsing the journal file as well.
#if int(obj["length"]) != realSize:
# print("{} has the wrong length in its key. Actual length is {}.".format(obj["key"], realSize))
except Exception as e:
print("Failed to parse {}, got {}".format(metafile, e))
traceback.print_exc()
def walkMetaDir(basepath):
for p in basepath.iterdir():
if p.is_dir():
#print("Recursing on {}".format(p))
walkMetaDir(p)
elif p.is_file():
if p.suffix == ".meta":
validateMetadata(p)
else:
print("{} is not a metadata file".format(p))
else:
print("{} is not a metadata file".format(p))
# Verifies that everything in journalPath has a corresponding object in cloud/cache
def verifyValidJournalFiles():
for p in journalPath.iterdir():
l_cachePath = cachePath/(p.stem);
l_cloudPath = cloudPath/(p.stem);
if not l_cachePath.is_file() and not l_cloudPath.is_file():
print("Journal file {} has no corresponding object in cache or cloud storage".format(p))
def verifyNoOrphans():
for path in cloudPath.iterdir():
if path.name not in bigObjectSet:
print("{} is in cloud storage but not referenced by any metadata file".format(path.name))
for path in cachePath.iterdir():
if path.name not in bigObjectSet:
print("{} is in the cache but not referenced by any metadata file".format(path.name))
def main():
parseArgs()
print("Verifying that all objects in metadata exist in cloud storage or the cache")
walkMetaDir(metaPath)
print("Verifying that all journal files have a corresponding object")
verifyValidJournalFiles()
print("Verifying that all objects in cloud & cache are referenced by metadata")
verifyNoOrphans()
print("Done")
sys.exit(0)
if sys.version_info < (3, 5):
print("Please use python version 3.5 or greater")
sys.exit(1)
if __name__ == "__main__":
main()