mirror of
https://github.com/quay/quay.git
synced 2026-01-26 06:21:37 +03:00
* allow to disable strict logging in general Signed-off-by: dmesser <dmesser@redhat.com> * add strict logging exceptions for log kinds during reads Signed-off-by: dmesser <dmesser@redhat.com> * add strict logging exceptions for log kinds during reads Signed-off-by: dmesser <dmesser@redhat.com> * formatting Signed-off-by: dmesser <dmesser@redhat.com> --------- Signed-off-by: dmesser <dmesser@redhat.com>
414 lines
11 KiB
Python
414 lines
11 KiB
Python
import json
|
|
import logging
|
|
from calendar import timegm
|
|
from datetime import datetime, timedelta
|
|
|
|
from cachetools.func import lru_cache
|
|
from peewee import JOIN, PeeweeException, fn
|
|
|
|
from data.database import LogEntry3, LogEntryKind, RepositoryActionCount, User, db
|
|
from data.model import DataModelException, config, user
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
ACTIONS_ALLOWED_WITHOUT_AUDIT_LOGGING = [
|
|
"pull_repo",
|
|
"login_failure",
|
|
"pull_repo_failed",
|
|
"login_success",
|
|
"logout_success",
|
|
]
|
|
|
|
|
|
def _logs_query(
|
|
selections,
|
|
start_time=None,
|
|
end_time=None,
|
|
performer=None,
|
|
repository=None,
|
|
namespace=None,
|
|
ignore=None,
|
|
model=LogEntry3,
|
|
id_range=None,
|
|
namespace_id=None,
|
|
):
|
|
"""
|
|
Returns a query for selecting logs from the table, with various options and filters.
|
|
"""
|
|
if namespace is not None:
|
|
assert namespace_id is None
|
|
|
|
if namespace_id is not None:
|
|
assert namespace is None
|
|
|
|
assert (start_time is not None and end_time is not None) or (id_range is not None)
|
|
joined = model.select(*selections).switch(model)
|
|
|
|
if id_range is not None:
|
|
joined = joined.where(model.id >= id_range[0], model.id <= id_range[1])
|
|
else:
|
|
joined = joined.where(model.datetime >= start_time, model.datetime < end_time)
|
|
|
|
if repository:
|
|
joined = joined.where(model.repository == repository)
|
|
|
|
if performer:
|
|
joined = joined.where(model.performer == performer)
|
|
|
|
if namespace and not repository:
|
|
namespace_user = user.get_user_or_org(namespace)
|
|
if namespace_user is None:
|
|
raise DataModelException("Invalid namespace requested: %s" % namespace)
|
|
|
|
joined = joined.where(model.account == namespace_user.id)
|
|
|
|
if namespace_id is not None and not repository:
|
|
joined = joined.where(model.account == namespace_id)
|
|
|
|
if ignore:
|
|
kind_map = get_log_entry_kinds()
|
|
ignore_ids = [kind_map[kind_name] for kind_name in ignore]
|
|
joined = joined.where(~(model.kind << ignore_ids))
|
|
|
|
return joined
|
|
|
|
|
|
def _latest_logs_query(
|
|
selections,
|
|
performer=None,
|
|
repository=None,
|
|
namespace=None,
|
|
ignore=None,
|
|
model=LogEntry3,
|
|
size=None,
|
|
):
|
|
"""
|
|
Returns a query for selecting the latest logs from the table, with various options and filters.
|
|
"""
|
|
query = model.select(*selections).switch(model)
|
|
|
|
if repository:
|
|
query = query.where(model.repository == repository)
|
|
|
|
if performer:
|
|
query = query.where(model.repository == repository)
|
|
|
|
if namespace and not repository:
|
|
namespace_user = user.get_user_or_org(namespace)
|
|
if namespace_user is None:
|
|
raise DataModelException("Invalid namespace requested")
|
|
|
|
query = query.where(model.account == namespace_user.id)
|
|
|
|
if ignore:
|
|
kind_map = get_log_entry_kinds()
|
|
ignore_ids = [kind_map[kind_name] for kind_name in ignore]
|
|
query = query.where(~(model.kind << ignore_ids))
|
|
|
|
query = query.order_by(model.datetime.desc(), model.id)
|
|
|
|
if size:
|
|
query = query.limit(size)
|
|
|
|
return query
|
|
|
|
|
|
@lru_cache(maxsize=1)
|
|
def get_log_entry_kinds():
|
|
kind_map = {}
|
|
for kind in LogEntryKind.select():
|
|
kind_map[kind.id] = kind.name
|
|
kind_map[kind.name] = kind.id
|
|
|
|
return kind_map
|
|
|
|
|
|
def _get_log_entry_kind(name):
|
|
kinds = get_log_entry_kinds()
|
|
return kinds[name]
|
|
|
|
|
|
def get_aggregated_logs(
|
|
start_time,
|
|
end_time,
|
|
performer=None,
|
|
repository=None,
|
|
namespace=None,
|
|
ignore=None,
|
|
model=LogEntry3,
|
|
):
|
|
"""
|
|
Returns the count of logs, by kind and day, for the logs matching the given filters.
|
|
"""
|
|
date = db.extract_date("day", model.datetime)
|
|
selections = [model.kind, date.alias("day"), fn.Count(model.id).alias("count")]
|
|
query = _logs_query(
|
|
selections, start_time, end_time, performer, repository, namespace, ignore, model=model
|
|
)
|
|
return query.group_by(date, model.kind)
|
|
|
|
|
|
def get_logs_query(
|
|
start_time=None,
|
|
end_time=None,
|
|
performer=None,
|
|
repository=None,
|
|
namespace=None,
|
|
namespace_id=None,
|
|
ignore=None,
|
|
model=LogEntry3,
|
|
id_range=None,
|
|
):
|
|
"""
|
|
Returns the logs matching the given filters.
|
|
"""
|
|
Performer = User.alias()
|
|
Account = User.alias()
|
|
selections = [model, Performer]
|
|
|
|
if namespace is None and repository is None and namespace_id is None:
|
|
selections.append(Account)
|
|
|
|
query = _logs_query(
|
|
selections,
|
|
start_time,
|
|
end_time,
|
|
performer,
|
|
repository,
|
|
namespace,
|
|
ignore,
|
|
model=model,
|
|
id_range=id_range,
|
|
namespace_id=namespace_id,
|
|
)
|
|
query = query.switch(model).join(
|
|
Performer, JOIN.LEFT_OUTER, on=(model.performer == Performer.id).alias("performer")
|
|
)
|
|
|
|
if namespace is None and repository is None and namespace_id is None:
|
|
query = query.switch(model).join(
|
|
Account, JOIN.LEFT_OUTER, on=(model.account == Account.id).alias("account")
|
|
)
|
|
|
|
return query
|
|
|
|
|
|
def get_latest_logs_query(
|
|
performer=None, repository=None, namespace=None, ignore=None, model=LogEntry3, size=None
|
|
):
|
|
"""
|
|
Returns the latest logs matching the given filters.
|
|
"""
|
|
Performer = User.alias()
|
|
Account = User.alias()
|
|
selections = [model, Performer]
|
|
|
|
if namespace is None and repository is None:
|
|
selections.append(Account)
|
|
|
|
query = _latest_logs_query(
|
|
selections, performer, repository, namespace, ignore, model=model, size=size
|
|
)
|
|
query = query.switch(model).join(
|
|
Performer, JOIN.LEFT_OUTER, on=(model.performer == Performer.id).alias("performer")
|
|
)
|
|
|
|
if namespace is None and repository is None:
|
|
query = query.switch(model).join(
|
|
Account, JOIN.LEFT_OUTER, on=(model.account == Account.id).alias("account")
|
|
)
|
|
|
|
return query
|
|
|
|
|
|
def _json_serialize(obj):
|
|
if isinstance(obj, datetime):
|
|
return timegm(obj.utctimetuple())
|
|
|
|
return obj
|
|
|
|
|
|
def log_action(
|
|
kind_name,
|
|
user_or_organization_name,
|
|
performer=None,
|
|
repository=None,
|
|
ip=None,
|
|
metadata={},
|
|
timestamp=None,
|
|
):
|
|
"""
|
|
Logs an entry in the LogEntry table.
|
|
"""
|
|
if not timestamp:
|
|
timestamp = datetime.today()
|
|
|
|
account = None
|
|
if user_or_organization_name is not None:
|
|
account = User.get(User.username == user_or_organization_name).id
|
|
else:
|
|
account = config.app_config.get("SERVICE_LOG_ACCOUNT_ID")
|
|
if account is None:
|
|
account = user.get_minimum_user_id()
|
|
|
|
if performer is not None:
|
|
performer = performer.id
|
|
|
|
if repository is not None:
|
|
repository = repository.id
|
|
|
|
kind = _get_log_entry_kind(kind_name)
|
|
metadata_json = json.dumps(metadata, default=_json_serialize)
|
|
log_data = {
|
|
"kind": kind,
|
|
"account": account,
|
|
"performer": performer,
|
|
"repository": repository,
|
|
"ip": ip,
|
|
"metadata_json": metadata_json,
|
|
"datetime": timestamp,
|
|
}
|
|
|
|
try:
|
|
LogEntry3.create(**log_data)
|
|
except PeeweeException as ex:
|
|
strict_logging_disabled = config.app_config.get("ALLOW_WITHOUT_STRICT_LOGGING") or (
|
|
config.app_config.get("ALLOW_PULLS_WITHOUT_STRICT_LOGGING")
|
|
and kind_name in ACTIONS_ALLOWED_WITHOUT_AUDIT_LOGGING
|
|
)
|
|
if strict_logging_disabled:
|
|
logger.exception("log_action failed", extra=({"exception": ex}).update(log_data))
|
|
else:
|
|
raise
|
|
|
|
|
|
def get_stale_logs_start_id(model):
|
|
"""
|
|
Gets the oldest log entry.
|
|
"""
|
|
try:
|
|
return (model.select(fn.Min(model.id)).tuples())[0][0]
|
|
except IndexError:
|
|
return None
|
|
|
|
|
|
def get_stale_logs(start_id, end_id, model, cutoff_date):
|
|
"""
|
|
Returns all the logs with IDs between start_id and end_id inclusively.
|
|
"""
|
|
return model.select().where(
|
|
(model.id >= start_id), (model.id <= end_id), model.datetime <= cutoff_date
|
|
)
|
|
|
|
|
|
def delete_stale_logs(start_id, end_id, model):
|
|
"""
|
|
Deletes all the logs with IDs between start_id and end_id.
|
|
"""
|
|
model.delete().where((model.id >= start_id), (model.id <= end_id)).execute()
|
|
|
|
|
|
def get_repository_action_counts(repo, start_date):
|
|
"""
|
|
Returns the daily aggregated action counts for the given repository, starting at the given start
|
|
date.
|
|
"""
|
|
return RepositoryActionCount.select().where(
|
|
RepositoryActionCount.repository == repo, RepositoryActionCount.date >= start_date
|
|
)
|
|
|
|
|
|
def get_repositories_action_sums(repository_ids):
|
|
"""
|
|
Returns a map from repository ID to total actions within that repository in the last week.
|
|
"""
|
|
if not repository_ids:
|
|
return {}
|
|
|
|
# Filter the join to recent entries only.
|
|
last_week = datetime.now() - timedelta(weeks=1)
|
|
tuples = (
|
|
RepositoryActionCount.select(
|
|
RepositoryActionCount.repository, fn.Sum(RepositoryActionCount.count)
|
|
)
|
|
.where(RepositoryActionCount.repository << repository_ids)
|
|
.where(RepositoryActionCount.date >= last_week)
|
|
.group_by(RepositoryActionCount.repository)
|
|
.tuples()
|
|
)
|
|
|
|
action_count_map = {}
|
|
for record in tuples:
|
|
action_count_map[record[0]] = record[1]
|
|
|
|
return action_count_map
|
|
|
|
|
|
def get_minimum_id_for_logs(start_time, repository_id=None, namespace_id=None, model=LogEntry3):
|
|
"""
|
|
Returns the minimum ID for logs matching the given repository or namespace in the logs table,
|
|
starting at the given start time.
|
|
"""
|
|
# First try bounded by a day. Most repositories will meet this criteria, and therefore
|
|
# can make a much faster query.
|
|
day_after = start_time + timedelta(days=1)
|
|
result = _get_bounded_id(
|
|
fn.Min,
|
|
model.datetime >= start_time,
|
|
repository_id,
|
|
namespace_id,
|
|
model.datetime < day_after,
|
|
model=model,
|
|
)
|
|
if result is not None:
|
|
return result
|
|
|
|
return _get_bounded_id(
|
|
fn.Min, model.datetime >= start_time, repository_id, namespace_id, model=model
|
|
)
|
|
|
|
|
|
def get_maximum_id_for_logs(end_time, repository_id=None, namespace_id=None, model=LogEntry3):
|
|
"""
|
|
Returns the maximum ID for logs matching the given repository or namespace in the logs table,
|
|
ending at the given end time.
|
|
"""
|
|
# First try bounded by a day. Most repositories will meet this criteria, and therefore
|
|
# can make a much faster query.
|
|
day_before = end_time - timedelta(days=1)
|
|
result = _get_bounded_id(
|
|
fn.Max,
|
|
model.datetime <= end_time,
|
|
repository_id,
|
|
namespace_id,
|
|
model.datetime > day_before,
|
|
model=model,
|
|
)
|
|
if result is not None:
|
|
return result
|
|
|
|
return _get_bounded_id(
|
|
fn.Max, model.datetime <= end_time, repository_id, namespace_id, model=model
|
|
)
|
|
|
|
|
|
def _get_bounded_id(
|
|
fn, filter_clause, repository_id, namespace_id, reduction_clause=None, model=LogEntry3
|
|
):
|
|
assert (namespace_id is not None) or (repository_id is not None)
|
|
query = model.select(fn(model.id)).where(filter_clause)
|
|
|
|
if reduction_clause is not None:
|
|
query = query.where(reduction_clause)
|
|
|
|
if repository_id is not None:
|
|
query = query.where(model.repository == repository_id)
|
|
else:
|
|
query = query.where(model.account == namespace_id)
|
|
|
|
row = query.tuples()[0]
|
|
if not row:
|
|
return None
|
|
|
|
return row[0]
|