mirror of
https://github.com/quay/quay.git
synced 2026-01-26 06:21:37 +03:00
* chore: drop deprecated tables and remove unused code * isort imports * migration: check for table existence before drop
200 lines
6.1 KiB
Python
200 lines
6.1 KiB
Python
import logging
|
|
from collections import namedtuple
|
|
from datetime import date, datetime, timedelta
|
|
|
|
from peewee import JOIN, IntegrityError
|
|
|
|
from data.database import (
|
|
LogEntry,
|
|
LogEntry2,
|
|
LogEntry3,
|
|
Repository,
|
|
RepositoryActionCount,
|
|
RepositorySearchScore,
|
|
db_random_func,
|
|
fn,
|
|
)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
search_bucket = namedtuple("search_bucket", ["delta", "days", "weight"])
|
|
|
|
# Defines the various buckets for search scoring. Each bucket is computed using the given time
|
|
# delta from today *minus the previous bucket's time period*. Once all the actions over the
|
|
# bucket's time period have been collected, they are multiplied by the given modifier. The modifiers
|
|
# for this bucket were determined via the integral of (2/((x/183)+1)^2)/183 over the period of days
|
|
# in the bucket; this integral over 0..183 has a sum of 1, so we get a good normalize score result.
|
|
SEARCH_BUCKETS = [
|
|
search_bucket(timedelta(days=1), 1, 0.010870),
|
|
search_bucket(timedelta(days=7), 6, 0.062815),
|
|
search_bucket(timedelta(days=31), 24, 0.21604),
|
|
search_bucket(timedelta(days=183), 152, 0.71028),
|
|
]
|
|
|
|
RAC_RETENTION_PERIOD = timedelta(days=365)
|
|
|
|
|
|
def count_repository_actions(to_count, day):
|
|
"""
|
|
Aggregates repository actions from the LogEntry table for the specified day.
|
|
|
|
Returns the count or None on error.
|
|
"""
|
|
# TODO: Clean this up a bit.
|
|
def lookup_action_count(model):
|
|
return (
|
|
model.select()
|
|
.where(
|
|
model.repository == to_count,
|
|
model.datetime >= day,
|
|
model.datetime < (day + timedelta(days=1)),
|
|
)
|
|
.count()
|
|
)
|
|
|
|
actions = (
|
|
lookup_action_count(LogEntry3)
|
|
+ lookup_action_count(LogEntry2)
|
|
+ lookup_action_count(LogEntry)
|
|
)
|
|
|
|
return actions
|
|
|
|
|
|
def found_entry_count(day):
|
|
"""
|
|
Returns the number of entries for the given day in the RAC table.
|
|
"""
|
|
return RepositoryActionCount.select().where(RepositoryActionCount.date == day).count()
|
|
|
|
|
|
def has_repository_action_count(repository, day):
|
|
"""
|
|
Returns whether there is a stored action count for a repository for a specific day.
|
|
"""
|
|
try:
|
|
RepositoryActionCount.get(repository=repository, date=day)
|
|
return True
|
|
except RepositoryActionCount.DoesNotExist:
|
|
return False
|
|
|
|
|
|
def store_repository_action_count(repository, day, action_count):
|
|
"""
|
|
Stores the action count for a repository for a specific day.
|
|
|
|
Returns False if the repository already has an entry for the specified day.
|
|
"""
|
|
try:
|
|
RepositoryActionCount.create(repository=repository, date=day, count=action_count)
|
|
return True
|
|
except IntegrityError:
|
|
logger.debug("Count already written for repository %s", repository.id)
|
|
return False
|
|
|
|
|
|
def update_repository_score(repo):
|
|
"""
|
|
Updates the repository score entry for the given table by retrieving information from the
|
|
RepositoryActionCount table.
|
|
|
|
Note that count_repository_actions for the repo should be called first. Returns True if the row
|
|
was updated and False otherwise.
|
|
"""
|
|
today = date.today()
|
|
|
|
# Retrieve the counts for each bucket and calculate the final score.
|
|
final_score = 0.0
|
|
last_end_timedelta = timedelta(days=0)
|
|
|
|
for bucket in SEARCH_BUCKETS:
|
|
start_date = today - bucket.delta
|
|
end_date = today - last_end_timedelta
|
|
last_end_timedelta = bucket.delta
|
|
|
|
query = RepositoryActionCount.select(
|
|
fn.Sum(RepositoryActionCount.count), fn.Count(RepositoryActionCount.id)
|
|
).where(
|
|
RepositoryActionCount.date >= start_date,
|
|
RepositoryActionCount.date < end_date,
|
|
RepositoryActionCount.repository == repo,
|
|
)
|
|
|
|
bucket_tuple = query.tuples()[0]
|
|
logger.debug(
|
|
"Got bucket tuple %s for bucket %s for repository %s", bucket_tuple, bucket, repo.id
|
|
)
|
|
|
|
if bucket_tuple[0] is None:
|
|
continue
|
|
|
|
bucket_sum = float(bucket_tuple[0])
|
|
bucket_count = int(bucket_tuple[1])
|
|
if not bucket_count:
|
|
continue
|
|
|
|
bucket_score = bucket_sum / (bucket_count * 1.0)
|
|
final_score += bucket_score * bucket.weight
|
|
|
|
# Update the existing repo search score row or create a new one.
|
|
normalized_score = int(final_score * 100.0)
|
|
try:
|
|
try:
|
|
search_score_row = RepositorySearchScore.get(repository=repo)
|
|
search_score_row.last_updated = datetime.now()
|
|
search_score_row.score = normalized_score
|
|
search_score_row.save()
|
|
return True
|
|
except RepositorySearchScore.DoesNotExist:
|
|
RepositorySearchScore.create(
|
|
repository=repo, score=normalized_score, last_updated=today
|
|
)
|
|
return True
|
|
except IntegrityError:
|
|
logger.debug("RepositorySearchScore row already existed; skipping")
|
|
return False
|
|
|
|
|
|
def missing_counts_query(date):
|
|
"""Returns a query to find all Repository's with missing RAC entries for the given date."""
|
|
subquery = (
|
|
RepositoryActionCount.select(RepositoryActionCount.id, RepositoryActionCount.repository)
|
|
.where(RepositoryActionCount.date == date)
|
|
.alias("rac")
|
|
)
|
|
|
|
return (
|
|
Repository.select()
|
|
.join(subquery, JOIN.LEFT_OUTER, on=(Repository.id == subquery.c.repository_id))
|
|
.where(subquery.c.id >> None)
|
|
)
|
|
|
|
|
|
def delete_expired_entries(repo, limit=50):
|
|
"""Deletes expired entries from the RepositoryActionCount table for a specific repository.
|
|
Returns the number of entries removed.
|
|
"""
|
|
threshold_date = datetime.today() - RAC_RETENTION_PERIOD
|
|
found = list(
|
|
RepositoryActionCount.select()
|
|
.where(
|
|
RepositoryActionCount.repository == repo, RepositoryActionCount.date < threshold_date
|
|
)
|
|
.limit(limit)
|
|
)
|
|
|
|
if not found:
|
|
return 0
|
|
|
|
assert len(found) <= limit
|
|
|
|
count_removed = 0
|
|
for entry in found:
|
|
try:
|
|
entry.delete_instance(recursive=False)
|
|
count_removed += 1
|
|
except IntegrityError:
|
|
continue
|
|
|
|
return count_removed
|