mirror of
https://github.com/quay/quay.git
synced 2025-04-19 21:42:17 +03:00
* chore: drop deprecated tables and remove unused code * isort imports * migration: check for table existence before drop
167 lines
5.8 KiB
Python
167 lines
5.8 KiB
Python
import json
|
|
import logging
|
|
import time
|
|
from datetime import datetime
|
|
from gzip import GzipFile
|
|
from tempfile import SpooledTemporaryFile
|
|
|
|
import features
|
|
from app import app, storage
|
|
from data.logs_model import logs_model
|
|
from data.userfiles import DelegateUserfiles
|
|
from util.locking import GlobalLock, LockNotAcquiredException
|
|
from util.log import logfile_path
|
|
from util.streamingjsonencoder import StreamingJSONEncoder
|
|
from util.timedeltastring import convert_to_timedelta
|
|
from workers.gunicorn_worker import GunicornWorker
|
|
from workers.worker import Worker
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
JSON_MIMETYPE = "application/json"
|
|
MIN_LOGS_PER_ROTATION = 5000
|
|
MEMORY_TEMPFILE_SIZE = 12 * 1024 * 1024
|
|
|
|
WORKER_FREQUENCY = app.config.get("ACTION_LOG_ROTATION_FREQUENCY", 60 * 60 * 12)
|
|
STALE_AFTER = convert_to_timedelta(app.config.get("ACTION_LOG_ROTATION_THRESHOLD", "30d"))
|
|
MINIMUM_LOGS_AGE_FOR_ARCHIVE = convert_to_timedelta(
|
|
app.config.get("MINIMUM_LOGS_AGE_FOR_ARCHIVE", "7d")
|
|
)
|
|
SAVE_PATH = app.config.get("ACTION_LOG_ARCHIVE_PATH")
|
|
SAVE_LOCATION = app.config.get("ACTION_LOG_ARCHIVE_LOCATION")
|
|
|
|
|
|
class LogRotateWorker(Worker):
|
|
"""
|
|
Worker used to rotate old logs out the database and into storage.
|
|
"""
|
|
|
|
def __init__(self):
|
|
super(LogRotateWorker, self).__init__()
|
|
self.add_operation(self._archive_logs, WORKER_FREQUENCY)
|
|
|
|
def _archive_logs(self):
|
|
cutoff_date = datetime.now() - STALE_AFTER
|
|
try:
|
|
with GlobalLock("ACTION_LOG_ROTATION"):
|
|
self._perform_archiving(cutoff_date)
|
|
except LockNotAcquiredException:
|
|
return
|
|
|
|
def _perform_archiving(self, cutoff_date):
|
|
assert datetime.now() - cutoff_date >= MINIMUM_LOGS_AGE_FOR_ARCHIVE
|
|
|
|
archived_files = []
|
|
save_location = SAVE_LOCATION
|
|
if not save_location:
|
|
# Pick the *same* save location for all instances. This is a fallback if
|
|
# a location was not configured.
|
|
save_location = storage.locations[0]
|
|
|
|
log_archive = DelegateUserfiles(app, storage, save_location, SAVE_PATH)
|
|
|
|
for log_rotation_context in logs_model.yield_log_rotation_context(
|
|
cutoff_date, MIN_LOGS_PER_ROTATION
|
|
):
|
|
with log_rotation_context as context:
|
|
for logs, filename in context.yield_logs_batch():
|
|
formatted_logs = [log_dict(log) for log in logs]
|
|
logger.debug("Archiving logs rotation %s", filename)
|
|
_write_logs(filename, formatted_logs, log_archive)
|
|
logger.debug("Finished archiving logs to %s", filename)
|
|
archived_files.append(filename)
|
|
|
|
return archived_files
|
|
|
|
|
|
def log_dict(log):
|
|
"""
|
|
Pretty prints a LogEntry in JSON.
|
|
"""
|
|
try:
|
|
# The `metadata_json` text field is replaced by `metadata` object field
|
|
# when the logs model is set to elasticsearch
|
|
if hasattr(log, "metadata_json"):
|
|
metadata_json = json.loads(str(log.metadata_json))
|
|
elif hasattr(log, "metadata") and log.metadata:
|
|
metadata_json = log.metadata.to_dict()
|
|
else:
|
|
metadata_json = {}
|
|
except AttributeError:
|
|
logger.exception(
|
|
"Could not get metadata for log entry %s",
|
|
log.id if hasattr(log, "id") else log.random_id,
|
|
)
|
|
metadata_json = {}
|
|
except ValueError:
|
|
# The results returned by querying Elasticsearch does not have
|
|
# a top-level attribute `id` like when querying with Peewee.
|
|
# `random_id` is a copy of the document's `_id`.
|
|
logger.exception(
|
|
"Could not parse metadata JSON for log entry %s",
|
|
log.id if hasattr(log, "id") else log.random_id,
|
|
)
|
|
metadata_json = {"__raw": log.metadata_json}
|
|
except TypeError:
|
|
logger.exception(
|
|
"Could not parse metadata JSON for log entry %s",
|
|
log.id if hasattr(log, "id") else log.random_id,
|
|
)
|
|
metadata_json = {"__raw": log.metadata_json}
|
|
|
|
return {
|
|
"kind_id": log.kind_id,
|
|
"account_id": log.account_id,
|
|
"performer_id": log.performer_id,
|
|
"repository_id": log.repository_id,
|
|
"datetime": str(log.datetime),
|
|
"ip": str(log.ip),
|
|
"metadata_json": metadata_json,
|
|
}
|
|
|
|
|
|
def _write_logs(filename, logs, log_archive):
|
|
with SpooledTemporaryFile(MEMORY_TEMPFILE_SIZE) as tempfile:
|
|
with GzipFile("temp_action_log_rotate", fileobj=tempfile, compresslevel=1) as zipstream:
|
|
for chunk in StreamingJSONEncoder().iterencode(logs):
|
|
zipstream.write(chunk.encode("utf-8"))
|
|
|
|
tempfile.seek(0)
|
|
log_archive.store_file(tempfile, JSON_MIMETYPE, content_encoding="gzip", file_id=filename)
|
|
|
|
|
|
def create_gunicorn_worker():
|
|
"""
|
|
follows the gunicorn application factory pattern, enabling
|
|
a quay worker to run as a gunicorn worker thread.
|
|
|
|
this is useful when utilizing gunicorn's hot reload in local dev.
|
|
|
|
utilizing this method will enforce a 1:1 quay worker to gunicorn worker ratio.
|
|
"""
|
|
feature_flag = (features.ACTION_LOG_ROTATION) or (not None in [SAVE_PATH, SAVE_LOCATION])
|
|
worker = GunicornWorker(__name__, app, LogRotateWorker(), feature_flag)
|
|
return worker
|
|
|
|
|
|
def main():
|
|
logging.config.fileConfig(logfile_path(debug=False), disable_existing_loggers=False)
|
|
|
|
if app.config.get("ACCOUNT_RECOVERY_MODE", False):
|
|
logger.debug("Quay running in account recovery mode")
|
|
while True:
|
|
time.sleep(100000)
|
|
|
|
if not features.ACTION_LOG_ROTATION or None in [SAVE_PATH, SAVE_LOCATION]:
|
|
logger.debug("Action log rotation worker not enabled; skipping")
|
|
while True:
|
|
time.sleep(100000)
|
|
|
|
GlobalLock.configure(app.config)
|
|
worker = LogRotateWorker()
|
|
worker.start()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|