1
0
mirror of https://github.com/quay/quay.git synced 2026-01-26 06:21:37 +03:00
Files
quay/util/saas/exceptionlog.py
Ryan Wallace 40d9458053 fix(sentry): filter out repo metadata when sending events (PROJQUAY-9436) (#4723)
fix(sentry): filter out repo metadata when sending events
2025-12-11 15:54:33 -05:00

374 lines
12 KiB
Python

import logging
import re
from typing import Any, Optional
import sentry_sdk
from sentry_sdk.integrations.flask import FlaskIntegration
from sentry_sdk.integrations.logging import LoggingIntegration
from sentry_sdk.integrations.sqlalchemy import SqlalchemyIntegration
from sentry_sdk.integrations.stdlib import StdlibIntegration
# Note: Imported inside functions to avoid circular imports where possible
logger = logging.getLogger(__name__)
# patterns to NEVER exclude
IMPORTANT_PATTERNS = [
"database",
"postgresql",
"mysql",
"redis",
"ldap",
]
EXCLUDE_PATTERNS = [
# networking
"err_network",
"err_canceled",
"econnaborted",
"etimedout",
"err_fr_too_many_redirects",
"network error",
"connection aborted",
"connection timeout",
"request timeout",
"fetch failed",
# csrf
"csrf",
"invalid token",
"token mismatch",
"forbidden (csrf token missing)",
"session expired",
"authentication required",
# client errors
"unauthorized",
"forbidden",
"not found",
"bad request",
"method not allowed",
"not acceptable",
"conflict",
"gone",
"precondition failed",
"request entity too large",
"request uri too long",
"unsupported media type",
"requested range not satisfiable",
"expectation failed",
# noisy infrastructure
"security scanner endpoint",
"localhost:6000",
"clair",
"vulnerability scanner",
"indexer api",
"connection error when trying to connect",
"errno 111",
"connectionrefusederror",
"service unavailable",
"endpoint not available",
"security scanner",
"[otel]",
"otel request",
]
# Regex pattern for HTTP 4xx status codes in context
# Matches: "Error 400", "Status 401", "HTTP 403", " 404:", etc.
# Avoids: "error: 400473", "port 4001", etc.
HTTP_4XX_PATTERN = re.compile(
r"(?:\b(?:error|status|http)\s+4\d{2}\b)|(?:\s4\d{2}:)",
re.IGNORECASE,
)
# Regex pattern to strip Arguments metadata from error messages
# Matches: "; Arguments: {'repository': 'bitnami/redis-exporter', ...}"
ARGUMENTS_PATTERN = re.compile(r";\s*Arguments:\s*\{.*\}$", re.IGNORECASE | re.DOTALL)
BROWSER_ERROR_PATTERNS = [
"script error",
"syntax error",
"reference error",
"type error",
"cannot read property",
"undefined is not a function",
"network request failed",
"failed to fetch",
"load failed",
"cors error",
"cross-origin",
"blocked by client",
]
def _extract_searchable_text(ex_event: Any) -> set[str]:
"""
Extract all searchable text from a Sentry error event.
Sentry error events can be created in different ways:
- Via exceptions: have 'exception.values' field
- Via logger.error(): have 'logentry.formatted' field
Args:
ex_event: Sentry event dictionary
Returns:
Set of lowercase strings containing exception values, log messages,
and other searchable event fields. Empty strings are filtered out.
"""
texts = set()
# Extract from exception field (errors raised as exceptions)
exception_values = ex_event.get("exception", {}).get("values", [])
for exc in exception_values:
texts.add(str(exc.get("value", "")).lower())
texts.add(str(exc.get("type", "")).lower())
# Extract from logentry field (errors from logger.error() calls)
logentry = ex_event.get("logentry", {})
if logentry:
texts.add(str(logentry.get("formatted", "")).lower())
texts.add(str(logentry.get("message", "")).lower())
# Extract from top-level message fields
if "message" in ex_event:
texts.add(str(ex_event.get("message", "")).lower())
if "title" in ex_event:
texts.add(str(ex_event.get("title", "")).lower())
# Extract from metadata (may contain title or other descriptive text)
metadata = ex_event.get("metadata", {})
if isinstance(metadata, dict):
if "title" in metadata:
texts.add(str(metadata.get("title", "")).lower())
if "value" in metadata:
texts.add(str(metadata.get("value", "")).lower())
# Extract logger name and culprit (function/module that caused error)
if "logger" in ex_event:
texts.add(str(ex_event.get("logger", "")).lower())
if "culprit" in ex_event:
texts.add(str(ex_event.get("culprit", "")).lower())
# Strip Arguments metadata to prevent false positives from repository/image names
texts = {ARGUMENTS_PATTERN.sub("", t) for t in texts}
# Remove empty strings
texts.discard("")
return texts
def _has_important_patterns(texts: set[str], important_patterns: list[str]) -> bool:
"""
Check if any text contains important patterns that should never be filtered.
Args:
texts: Set of text strings to search
important_patterns: Patterns that indicate event should be kept
Returns:
True if important patterns found (keep event), False otherwise
"""
for text in texts:
if any(pattern in text for pattern in important_patterns):
return True
return False
def _should_drop_by_patterns(texts: set[str], filter_patterns: list[str]) -> bool:
"""
Check if any text contains filter patterns.
Args:
texts: Set of text strings to search
filter_patterns: Patterns that indicate event should be filtered
Returns:
True if filter patterns found (drop event), False otherwise
"""
for text in texts:
if any(pattern in text for pattern in filter_patterns):
return True
return False
def _matches_http_4xx_pattern(texts: set[str]) -> bool:
"""
Check if any text matches HTTP 4xx status code pattern.
Args:
texts: Set of text strings to search
Returns:
True if 4xx pattern found (drop event), False otherwise
"""
for text in texts:
if HTTP_4XX_PATTERN.search(text):
return True
return False
def _sentry_before_send_ignore_known(ex_event: Any, hint: Any) -> Optional[Any]:
"""
Drop error events for expected client-side errors that we don't want in Sentry.
Filters errors regardless of how they were reported (via exceptions or logger calls).
Specifically ignore:
- Auth token errors (InvalidBearerTokenException, InvalidJWTException)
- HTTP 4xx client errors (400, 401, 403, 404, etc.)
- Network errors (ERR_NETWORK, ECONNABORTED, ETIMEDOUT, ERR_CANCELED)
- Browser/JavaScript errors from frontend
- CSRF token related errors
- Session expiration errors
"""
if not ex_event:
return ex_event
try:
# Check exception info from hint
exc_info = hint.get("exc_info") if isinstance(hint, dict) else None
if exc_info is not None and len(exc_info) >= 1:
exc_type = exc_info[0]
# Compare by class name to avoid import-time cycles; fall back to isinstance if safe
name = getattr(exc_type, "__name__", "")
if name in {"InvalidBearerTokenException", "InvalidJWTException"}:
return None
# Extract all searchable text from event
texts = _extract_searchable_text(ex_event)
if texts:
# First: Check if this is an important error we should NEVER filter
if _has_important_patterns(texts, IMPORTANT_PATTERNS):
return ex_event
# Now check for patterns that should be filtered
# Check for auth token exceptions (by exception type name or in message text)
if any("invalidbearertokenexception" in t or "invalidjwtexception" in t for t in texts):
return None
# Check for network-related errors
if _should_drop_by_patterns(texts, EXCLUDE_PATTERNS):
return None
# Check for HTTP 4xx status codes in error messages (regex matching)
if _matches_http_4xx_pattern(texts):
return None
# Check for HTTP 4xx status codes in event tags (for all requests)
if "tags" in ex_event:
status_code = ex_event.get("tags", {}).get("status_code")
try:
if status_code is not None and 400 <= int(status_code) < 500:
return None
except (TypeError, ValueError):
# Non-numeric status codes are ignored by this filter
pass
# Check for browser-specific errors
if "platform" in ex_event:
platform = str(ex_event.get("platform", "")).lower()
if platform in ["javascript", "browser"]:
# Filter out common browser errors that are not server-side issues
if texts and _should_drop_by_patterns(texts, BROWSER_ERROR_PATTERNS):
return None
except Exception:
# Never break error reporting from the filter
pass
return ex_event
import features
class FakeSentryClient(object):
def captureException(self, *args, **kwargs):
pass
def user_context(self, *args, **kwargs):
pass
class FakeSentry(object):
def __init__(self):
self.client = FakeSentryClient()
class Sentry(object):
def __init__(self, app=None):
self.app = app
if app is not None:
self.state = self.init_app(app)
else:
self.state = None
def init_app(self, app):
sentry_type = app.config.get("EXCEPTION_LOG_TYPE", "FakeSentry")
if sentry_type == "Sentry":
sentry_dsn = app.config.get("SENTRY_DSN", "")
if sentry_dsn:
try:
logger.info("Initializing Sentry with DSN: %s...", sentry_dsn[:10])
integrations = []
# Always include logging integration
integrations.append(
LoggingIntegration(level=logging.INFO, event_level=logging.ERROR)
)
# Only add Flask and SQLAlchemy integrations if OpenTelemetry is not enabled
# to avoid conflicts with OpenTelemetry instrumentors
if not getattr(features, "OTEL_TRACING", False):
integrations.extend(
[
FlaskIntegration(transaction_style="endpoint"),
SqlalchemyIntegration(),
StdlibIntegration(),
]
)
logger.info(
"Sentry initialized with full integrations (Flask, SQLAlchemy, Stdlib)"
)
else:
# When OTEL is enabled, use minimal integrations to avoid conflicts
logger.info("OpenTelemetry enabled - using minimal Sentry integrations")
initialized_sentry = sentry_sdk.init(
dsn=sentry_dsn,
environment=app.config.get("SENTRY_ENVIRONMENT", "production"),
traces_sample_rate=app.config.get("SENTRY_TRACES_SAMPLE_RATE", 0.1),
profiles_sample_rate=app.config.get("SENTRY_PROFILES_SAMPLE_RATE", 0.1),
sample_rate=app.config.get("SENTRY_SAMPLE_RATE", 0.1),
integrations=integrations,
default_integrations=False,
auto_session_tracking=True,
before_send=_sentry_before_send_ignore_known,
)
# Return the initialized Sentry SDK object directly
sentry = initialized_sentry
logger.info("Sentry initialization completed successfully")
except Exception as e:
logger.error("Failed to initialize Sentry: %s", str(e), exc_info=True)
sentry = FakeSentry()
else:
sentry = FakeSentry()
else:
sentry = FakeSentry()
# register extension with app
app.extensions = getattr(app, "extensions", {})
app.extensions["sentry"] = sentry
return sentry
def __getattr__(self, name):
if self.state is None:
return None
return getattr(self.state, name, None)