1
0
mirror of https://github.com/quay/quay.git synced 2026-01-26 06:21:37 +03:00

Have the RepositoryActionCount worker cleanup old rows in RAC (#274)

We don't make use of any action counts older than a year, so this
change will have the worker remove old rows, one month (roughly) at
a time
This commit is contained in:
Joseph Schorr
2020-03-18 17:00:14 -04:00
committed by GitHub
parent 7b827d953c
commit 8701577cff
2 changed files with 41 additions and 2 deletions

View File

@@ -31,6 +31,8 @@ SEARCH_BUCKETS = [
search_bucket(timedelta(days=183), 152, 0.71028),
]
RAC_RETENTION_PERIOD = timedelta(days=365)
def find_uncounted_repository():
"""
@@ -157,3 +159,28 @@ def update_repository_score(repo):
except IntegrityError:
logger.debug("RepositorySearchScore row already existed; skipping")
return False
def delete_expired_entries(repo, limit=100):
""" Deletes expired entries from the RepositoryActionCount table for a specific repository.
Returns the number of entries removed.
"""
threshold_date = datetime.today() - RAC_RETENTION_PERIOD
found = list(
RepositoryActionCount.select().where(
RepositoryActionCount.repository == repo, RepositoryActionCount.date < threshold_date
)
)
if not found:
return 0
count_removed = 0
for entry in found:
try:
entry.delete_instance(recursive=False)
count_removed += 1
except IntegrityError:
continue
return count_removed

View File

@@ -24,19 +24,22 @@ class RepositoryActionCountWorker(Worker):
"""
Counts actions and aggregates search scores for a random repository for the previous day.
"""
# Select a repository that needs its actions for the last day updated.
to_count = model.repositoryactioncount.find_uncounted_repository()
if to_count is None:
logger.debug("No further repositories to count")
return False
yesterday = date.today() - timedelta(days=1)
logger.debug("Found repository #%s to count", to_count.id)
# Count the number of actions that occurred yesterday for the repository.
yesterday = date.today() - timedelta(days=1)
daily_count = logs_model.count_repository_actions(to_count, yesterday)
if daily_count is None:
logger.debug("Could not load count for repository #%s", to_count.id)
return False
# Store the count for the repository.
was_counted = model.repositoryactioncount.store_repository_action_count(
to_count, yesterday, daily_count
)
@@ -44,6 +47,7 @@ class RepositoryActionCountWorker(Worker):
logger.debug("Repository #%s was counted by another worker", to_count.id)
return False
# Update the search score for the repository now that its actions have been counted.
logger.debug("Updating search score for repository #%s", to_count.id)
was_updated = model.repositoryactioncount.update_repository_score(to_count)
if not was_updated:
@@ -53,6 +57,14 @@ class RepositoryActionCountWorker(Worker):
return False
logger.debug("Repository #%s search score updated", to_count.id)
# Delete any entries older than the retention period for the repository.
while True:
found = model.repositoryactioncount.delete_expired_entries(to_count, 30)
if found <= 0:
break
logger.debug("Repository #%s old entries removed", to_count.id)
return True