You've already forked mariadb-columnstore-engine
							
							
				mirror of
				https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
				synced 2025-10-30 07:25:34 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			148 lines
		
	
	
		
			5.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			148 lines
		
	
	
		
			5.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import io
 | |
| import sys
 | |
| import argparse
 | |
| import json
 | |
| from pathlib import Path
 | |
| import os
 | |
| import configparser
 | |
| import re
 | |
| import traceback
 | |
| 
 | |
| 
 | |
| cloudPath = None
 | |
| metaPath = None
 | |
| journalPath = None
 | |
| cachePath = None
 | |
| bigObjectSet = set()
 | |
| 
 | |
| def get_envvar(match):
 | |
|     return os.environ[match.group(1)]
 | |
| 
 | |
| def resolve_envvars(setting):
 | |
|     result = str(setting)
 | |
|     pattern = ("\$\{(.*)\}")
 | |
|     result = re.sub(pattern, get_envvar, setting)
 | |
|     return result
 | |
| 
 | |
| def parseArgs():
 | |
|     global cloudPath
 | |
|     global metaPath
 | |
|     global journalPath
 | |
|     global cachePath
 | |
| 
 | |
|     parser = argparse.ArgumentParser(description="Verifies that the fake-cloud and cache contain what the metadata files say")
 | |
|     parser.add_argument("config_file", type=str, help="The storagemanager.cnf file")
 | |
|     args = parser.parse_args()
 | |
|     config = configparser.ConfigParser()
 | |
|     try:
 | |
|         config.read(args.config_file)
 | |
|         cloudPath = Path(resolve_envvars(config["LocalStorage"]["path"]))
 | |
|         metaPath = Path(resolve_envvars(config["ObjectStorage"]["metadata_path"]))
 | |
|         cachePath = Path(resolve_envvars(config["Cache"]["path"]))
 | |
|         journalPath = Path(resolve_envvars(config["ObjectStorage"]["journal_path"]))
 | |
|         #print("{}\n{}\n{}\n{}".format(cloudPath, metaPath, cachePath, journalPath))       
 | |
| 
 | |
|     except Exception as e:
 | |
|         parser.error("Failed to parse the config file.  Got '{}'".format(e))
 | |
| 
 | |
|     if not Path(cloudPath).is_dir() or not Path(metaPath).is_dir() or not Path(journalPath).is_dir() or not Path(cachePath).is_dir():
 | |
|         parser.error("cloudpath, metapath, and journalpath need to be directories.")
 | |
| 
 | |
| def key_breakout(key):
 | |
|     return key.split("_", 3)
 | |
| 
 | |
| def validateMetadata(metafile):
 | |
|     try:
 | |
|         metadata = json.load(open(metafile))
 | |
| 
 | |
|         for obj in metadata["objects"]:
 | |
|             bigObjectSet.add(obj["key"])
 | |
|             fields = key_breakout(obj["key"])
 | |
|             cPath = cachePath / obj["key"]
 | |
|             l_cloudPath = cloudPath / obj["key"]
 | |
|             #if fields[2] != obj["length"]:
 | |
|             #    print("object {}: in metadata length is {}, key says {}".format(obj["key"], obj["length"], fields[2]))
 | |
|             if fields[1] != obj["offset"]:
 | |
|                 print("object {}: in metadata offset is {}, key says {}".format(obj["key"], obj["offset"], fields[1]))
 | |
| 
 | |
|             realSize = -1
 | |
|             if cPath.exists():
 | |
|                 inCache = True
 | |
|                 realSize = cPath.stat().st_size
 | |
|             else:
 | |
|                 inCache = False
 | |
|             if l_cloudPath.exists():
 | |
|                 inCloud = True
 | |
|                 realSize = l_cloudPath.stat().st_size
 | |
|             else:
 | |
|                 inCloud = False
 | |
|             if not inCache and not inCloud:
 | |
|                 print("{} does not exist in cache or the cloud".format(obj["key"]))
 | |
|                 continue        
 | |
| 
 | |
|             # There are a couple cases where the length field and actual file size legitmately 
 | |
|             # don't match.
 | |
|             # 1) IOC::truncate() currently doesn't rename the object on truncate for
 | |
|             # performance reasons.
 | |
|             # 2) IOC::write() currently does the same on modifying an existing object.  
 | |
|             # In that case, we can validate the length by parsing the journal file as well.
 | |
|             #if int(obj["length"]) != realSize:
 | |
|             #    print("{} has the wrong length in its key.  Actual length is {}.".format(obj["key"], realSize))
 | |
|         
 | |
|     except Exception as e:
 | |
|         print("Failed to parse {}, got {}".format(metafile, e))
 | |
|         traceback.print_exc() 
 | |
| 
 | |
| 
 | |
| def walkMetaDir(basepath):
 | |
|     for p in basepath.iterdir():
 | |
|         if p.is_dir():
 | |
|             #print("Recursing on {}".format(p))
 | |
|             walkMetaDir(p)
 | |
|         elif p.is_file():
 | |
|             if p.suffix == ".meta": 
 | |
|                 validateMetadata(p)
 | |
|             else:
 | |
|                 print("{} is not a metadata file".format(p))
 | |
|         else:
 | |
|             print("{} is not a metadata file".format(p))
 | |
| 
 | |
| # Verifies that everything in journalPath has a corresponding object in cloud/cache
 | |
| def verifyValidJournalFiles():
 | |
|     for p in journalPath.iterdir():
 | |
|         l_cachePath = cachePath/(p.stem);
 | |
|         l_cloudPath = cloudPath/(p.stem);
 | |
|         if not l_cachePath.is_file() and not l_cloudPath.is_file():
 | |
|             print("Journal file {} has no corresponding object in cache or cloud storage".format(p))
 | |
| 
 | |
| def verifyNoOrphans():
 | |
|     for path in cloudPath.iterdir():
 | |
|         if path.name not in bigObjectSet:
 | |
|             print("{} is in cloud storage but not referenced by any metadata file".format(path.name))
 | |
| 
 | |
| 
 | |
|     for path in cachePath.iterdir():
 | |
|         if path.name not in bigObjectSet:
 | |
|             print("{} is in the cache but not referenced by any metadata file".format(path.name))
 | |
| 
 | |
| def main():
 | |
|     parseArgs()
 | |
| 
 | |
|     print("Verifying that all objects in metadata exist in cloud storage or the cache")
 | |
|     walkMetaDir(metaPath)
 | |
|     print("Verifying that all journal files have a corresponding object")
 | |
|     verifyValidJournalFiles()
 | |
|     print("Verifying that all objects in cloud & cache are referenced by metadata")
 | |
|     verifyNoOrphans()
 | |
|     print("Done")
 | |
|     sys.exit(0)
 | |
| 
 | |
| 
 | |
| if sys.version_info < (3, 5):
 | |
|     print("Please use python version 3.5 or greater")
 | |
|     sys.exit(1)
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     main()
 | |
| 
 |