mirror of
https://github.com/quay/quay.git
synced 2025-10-13 15:07:58 +03:00
Fix Sentry transport KeyError in exception logging initialization Co-authored-by: shudeshp <shudeshp@redhat.com>
191 lines
6.5 KiB
Python
191 lines
6.5 KiB
Python
import logging
|
|
import signal
|
|
import socket
|
|
import sys
|
|
import time
|
|
from datetime import datetime, timedelta
|
|
from functools import wraps
|
|
from random import randint
|
|
from threading import Event
|
|
|
|
import sentry_sdk
|
|
from apscheduler.schedulers.background import BackgroundScheduler
|
|
from sentry_sdk.integrations.flask import FlaskIntegration
|
|
from sentry_sdk.integrations.logging import LoggingIntegration
|
|
from sentry_sdk.integrations.sqlalchemy import SqlalchemyIntegration
|
|
from sentry_sdk.integrations.stdlib import StdlibIntegration
|
|
|
|
import features
|
|
from app import app
|
|
from data.database import UseThenDisconnect
|
|
from util.log import logfile_path
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def with_exponential_backoff(backoff_multiplier=10, max_backoff=3600, max_retries=10):
|
|
def inner(func):
|
|
"""
|
|
Decorator to retry the operation with exponential backoff if it raised an exception.
|
|
|
|
Waits 2^attempts * `backoff_multiplier`, up to `max_backoff`, up to `max_retries` number of time,
|
|
then re-raise the exception.
|
|
"""
|
|
|
|
def wrapper(*args, **kwargs):
|
|
attempts = 0
|
|
backoff = 0
|
|
|
|
while True:
|
|
next_backoff = 2**attempts * backoff_multiplier
|
|
backoff = min(next_backoff, max_backoff)
|
|
attempts += 1
|
|
|
|
try:
|
|
return func(*args, **kwargs)
|
|
except Exception as e:
|
|
if max_retries is not None and attempts == max_retries:
|
|
raise e
|
|
|
|
logger.exception("Operation raised exception, retrying in %d seconds", backoff)
|
|
time.sleep(backoff)
|
|
|
|
return wrapper
|
|
|
|
return inner
|
|
|
|
|
|
class Worker(object):
|
|
"""
|
|
Base class for workers which perform some work periodically.
|
|
"""
|
|
|
|
def __init__(self):
|
|
self._sched = BackgroundScheduler()
|
|
self._operations = []
|
|
self._stop = Event()
|
|
self._terminated = Event()
|
|
|
|
worker_name = "%s:worker-%s" % (socket.gethostname(), self.__class__.__name__)
|
|
|
|
if app.config.get("EXCEPTION_LOG_TYPE", "FakeSentry") == "Sentry":
|
|
sentry_dsn = app.config.get("SENTRY_DSN", "")
|
|
if sentry_dsn:
|
|
try:
|
|
integrations = []
|
|
|
|
# Always include logging integration
|
|
integrations.append(
|
|
LoggingIntegration(level=logging.INFO, event_level=logging.ERROR)
|
|
)
|
|
|
|
# Only add Flask and SQLAlchemy integrations if OpenTelemetry is not enabled
|
|
if not getattr(features, "OTEL_TRACING", False):
|
|
integrations.extend(
|
|
[
|
|
FlaskIntegration(transaction_style="endpoint"),
|
|
SqlalchemyIntegration(),
|
|
StdlibIntegration(),
|
|
]
|
|
)
|
|
else:
|
|
logger.info(
|
|
"OpenTelemetry enabled - using minimal Sentry integrations for worker"
|
|
)
|
|
|
|
sentry_sdk.init(
|
|
dsn=sentry_dsn,
|
|
environment=app.config.get("SENTRY_ENVIRONMENT", "production"),
|
|
traces_sample_rate=app.config.get("SENTRY_TRACES_SAMPLE_RATE", 0.1),
|
|
profiles_sample_rate=app.config.get("SENTRY_PROFILES_SAMPLE_RATE", 0.1),
|
|
integrations=integrations,
|
|
default_integrations=False,
|
|
auto_session_tracking=True,
|
|
)
|
|
sentry_sdk.set_tag("worker", worker_name)
|
|
except Exception as e:
|
|
logger.warning("Failed to initialize Sentry: %s", str(e))
|
|
|
|
def is_healthy(self):
|
|
return not self._stop.is_set()
|
|
|
|
def is_terminated(self):
|
|
return self._terminated.is_set()
|
|
|
|
def ungracefully_terminated(self):
|
|
"""
|
|
Method called when the worker has been terminated in an ungraceful fashion.
|
|
"""
|
|
pass
|
|
|
|
def add_operation(self, operation_func, operation_sec):
|
|
@wraps(operation_func)
|
|
def _operation_func():
|
|
try:
|
|
with UseThenDisconnect(app.config):
|
|
return operation_func()
|
|
except Exception:
|
|
logger.exception("Operation raised exception")
|
|
# Sentry SDK automatically captures exceptions when configured
|
|
sentry_sdk.capture_exception()
|
|
|
|
self._operations.append((_operation_func, operation_sec))
|
|
|
|
def _setup_and_wait_for_shutdown(self):
|
|
signal.signal(signal.SIGTERM, self.terminate)
|
|
signal.signal(signal.SIGINT, self.terminate)
|
|
|
|
while not self._stop.wait(1):
|
|
pass
|
|
|
|
def start(self):
|
|
logging.config.fileConfig(logfile_path(debug=False), disable_existing_loggers=False)
|
|
|
|
if not app.config.get("SETUP_COMPLETE", False):
|
|
logger.info("Product setup is not yet complete; skipping worker startup")
|
|
self._setup_and_wait_for_shutdown()
|
|
return
|
|
|
|
if app.config.get("REGISTRY_STATE", "normal") == "readonly":
|
|
logger.info("Product is in read-only mode; skipping worker startup")
|
|
self._setup_and_wait_for_shutdown()
|
|
return
|
|
|
|
logger.debug("Scheduling worker.")
|
|
|
|
self._sched.start()
|
|
for operation_func, operation_sec in self._operations:
|
|
start_date = datetime.now() + timedelta(seconds=0.001)
|
|
if app.config.get("STAGGER_WORKERS"):
|
|
start_date += timedelta(seconds=randint(1, operation_sec))
|
|
logger.debug("First run scheduled for %s", start_date)
|
|
self._sched.add_job(
|
|
operation_func,
|
|
"interval",
|
|
seconds=operation_sec,
|
|
start_date=start_date,
|
|
max_instances=1,
|
|
)
|
|
|
|
self._setup_and_wait_for_shutdown()
|
|
|
|
logger.debug("Waiting for running tasks to complete.")
|
|
self._sched.shutdown()
|
|
logger.debug("Finished.")
|
|
|
|
self._terminated.set()
|
|
|
|
def terminate(self, signal_num=None, stack_frame=None, graceful=False):
|
|
if self._terminated.is_set():
|
|
sys.exit(1)
|
|
|
|
else:
|
|
logger.debug("Shutting down worker.")
|
|
self._stop.set()
|
|
|
|
if not graceful:
|
|
self.ungracefully_terminated()
|
|
|
|
def join(self):
|
|
self.terminate(graceful=True)
|