mirror of
https://github.com/quay/quay.git
synced 2026-01-26 06:21:37 +03:00
Have the RepositoryActionCount worker cleanup old rows in RAC (#274)
We don't make use of any action counts older than a year, so this change will have the worker remove old rows, one month (roughly) at a time
This commit is contained in:
@@ -31,6 +31,8 @@ SEARCH_BUCKETS = [
|
||||
search_bucket(timedelta(days=183), 152, 0.71028),
|
||||
]
|
||||
|
||||
RAC_RETENTION_PERIOD = timedelta(days=365)
|
||||
|
||||
|
||||
def find_uncounted_repository():
|
||||
"""
|
||||
@@ -157,3 +159,28 @@ def update_repository_score(repo):
|
||||
except IntegrityError:
|
||||
logger.debug("RepositorySearchScore row already existed; skipping")
|
||||
return False
|
||||
|
||||
|
||||
def delete_expired_entries(repo, limit=100):
|
||||
""" Deletes expired entries from the RepositoryActionCount table for a specific repository.
|
||||
Returns the number of entries removed.
|
||||
"""
|
||||
threshold_date = datetime.today() - RAC_RETENTION_PERIOD
|
||||
found = list(
|
||||
RepositoryActionCount.select().where(
|
||||
RepositoryActionCount.repository == repo, RepositoryActionCount.date < threshold_date
|
||||
)
|
||||
)
|
||||
|
||||
if not found:
|
||||
return 0
|
||||
|
||||
count_removed = 0
|
||||
for entry in found:
|
||||
try:
|
||||
entry.delete_instance(recursive=False)
|
||||
count_removed += 1
|
||||
except IntegrityError:
|
||||
continue
|
||||
|
||||
return count_removed
|
||||
|
||||
@@ -24,19 +24,22 @@ class RepositoryActionCountWorker(Worker):
|
||||
"""
|
||||
Counts actions and aggregates search scores for a random repository for the previous day.
|
||||
"""
|
||||
# Select a repository that needs its actions for the last day updated.
|
||||
to_count = model.repositoryactioncount.find_uncounted_repository()
|
||||
if to_count is None:
|
||||
logger.debug("No further repositories to count")
|
||||
return False
|
||||
|
||||
yesterday = date.today() - timedelta(days=1)
|
||||
|
||||
logger.debug("Found repository #%s to count", to_count.id)
|
||||
|
||||
# Count the number of actions that occurred yesterday for the repository.
|
||||
yesterday = date.today() - timedelta(days=1)
|
||||
daily_count = logs_model.count_repository_actions(to_count, yesterday)
|
||||
if daily_count is None:
|
||||
logger.debug("Could not load count for repository #%s", to_count.id)
|
||||
return False
|
||||
|
||||
# Store the count for the repository.
|
||||
was_counted = model.repositoryactioncount.store_repository_action_count(
|
||||
to_count, yesterday, daily_count
|
||||
)
|
||||
@@ -44,6 +47,7 @@ class RepositoryActionCountWorker(Worker):
|
||||
logger.debug("Repository #%s was counted by another worker", to_count.id)
|
||||
return False
|
||||
|
||||
# Update the search score for the repository now that its actions have been counted.
|
||||
logger.debug("Updating search score for repository #%s", to_count.id)
|
||||
was_updated = model.repositoryactioncount.update_repository_score(to_count)
|
||||
if not was_updated:
|
||||
@@ -53,6 +57,14 @@ class RepositoryActionCountWorker(Worker):
|
||||
return False
|
||||
|
||||
logger.debug("Repository #%s search score updated", to_count.id)
|
||||
|
||||
# Delete any entries older than the retention period for the repository.
|
||||
while True:
|
||||
found = model.repositoryactioncount.delete_expired_entries(to_count, 30)
|
||||
if found <= 0:
|
||||
break
|
||||
|
||||
logger.debug("Repository #%s old entries removed", to_count.id)
|
||||
return True
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user