1
0
mirror of https://github.com/quay/quay.git synced 2026-01-27 18:42:52 +03:00
Files
quay/data/model/_basequery.py
Kurtis Mullins 38be6d05d0 Python 3 (#153)
* Convert all Python2 to Python3 syntax.

* Removes oauth2lib dependency

* Replace mockredis with fakeredis

* byte/str conversions

* Removes nonexisting __nonzero__ in Python3

* Python3 Dockerfile and related

* [PROJQUAY-98] Replace resumablehashlib with rehash

* PROJQUAY-123 - replace gpgme with python3-gpg

* [PROJQUAY-135] Fix unhashable class error

* Update external dependencies for Python 3

- Move github.com/app-registry/appr to github.com/quay/appr
- github.com/coderanger/supervisor-stdout
- github.com/DevTable/container-cloud-config
- Update to latest mockldap with changes applied from coreos/mockldap
- Update dependencies in requirements.txt and requirements-dev.txt

* Default FLOAT_REPR function to str in json encoder and removes keyword assignment

True, False, and str were not keywords in Python2...

* [PROJQUAY-165] Replace package `bencode` with `bencode.py`

- Bencode is not compatible with Python 3.x and is no longer
  maintained. Bencode.py appears to be a drop-in replacement/fork
  that is compatible with Python 3.

* Make sure monkey.patch is called before anything else (

* Removes anunidecode dependency and replaces it with text_unidecode

* Base64 encode/decode pickle dumps/loads when storing value in DB

Base64 encodes/decodes the serialized values when storing them in the
DB. Also make sure to return a Python3 string instead of a Bytes when
coercing for db, otherwise, Postgres' TEXT field will convert it into
a hex representation when storing the value.

* Implement __hash__ on Digest class

In Python 3, if a class defines __eq__() but not __hash__(), its
instances will not be usable as items in hashable collections (e.g sets).

* Remove basestring check

* Fix expected message in credentials tests

* Fix usage of Cryptography.Fernet for Python3 (#219)

- Specifically, this addresses the issue where Byte<->String
  conversions weren't being applied correctly.

* Fix utils

- tar+stream layer format utils
- filelike util

* Fix storage tests

* Fix endpoint tests

* Fix workers tests

* Fix docker's empty layer bytes

* Fix registry tests

* Appr

* Enable CI for Python 3.6

* Skip buildman tests

Skip buildman tests while it's being rewritten to allow ci to pass.

* Install swig for CI

* Update expected exception type in redis validation test

* Fix gpg signing calls

Fix gpg calls for updated gpg wrapper, and add signing tests.

* Convert / to // for Python3 integer division

* WIP: Update buildman to use asyncio instead of trollius.

This dependency is considered deprecated/abandoned and was only
used as an implementation/backport of asyncio on Python 2.x
This is a work in progress, and is included in the PR just to get the
rest of the tests passing. The builder is actually being rewritten.

* Target Python 3.8

* Removes unused files

- Removes unused files that were added accidentally while rebasing
- Small fixes/cleanup
- TODO tasks comments

* Add TODO to verify rehash backward compat with resumablehashlib

* Revert "[PROJQUAY-135] Fix unhashable class error" and implements __hash__ instead.

This reverts commit 735e38e3c1d072bf50ea864bc7e119a55d3a8976.
Instead, defines __hash__ for encryped fields class, using the parent
field's implementation.

* Remove some unused files ad imports

Co-authored-by: Kenny Lee Sin Cheong <kenny.lee@redhat.com>
Co-authored-by: Tom McKay <thomasmckay@redhat.com>
2020-06-05 16:50:13 -04:00

236 lines
7.1 KiB
Python

import logging
from peewee import fn, PeeweeException
from cachetools.func import lru_cache
from datetime import datetime, timedelta
from data.model import DataModelException, config
from data.readreplica import ReadOnlyModeException
from data.database import (
Repository,
RepositoryState,
User,
Team,
TeamMember,
RepositoryPermission,
TeamRole,
Namespace,
Visibility,
ImageStorage,
Image,
RepositoryKind,
db_for_update,
db_count_estimator,
db,
)
from functools import reduce
logger = logging.getLogger(__name__)
def reduce_as_tree(queries_to_reduce):
"""
This method will split a list of queries into halves recursively until we reach individual
queries, at which point it will start unioning the queries, or the already unioned subqueries.
This works around a bug in peewee SQL generation where reducing linearly generates a chain of
queries that will exceed the recursion depth limit when it has around 80 queries.
"""
mid = len(queries_to_reduce) // 2
left = queries_to_reduce[:mid]
right = queries_to_reduce[mid:]
to_reduce_right = right[0]
if len(right) > 1:
to_reduce_right = reduce_as_tree(right)
if len(left) > 1:
to_reduce_left = reduce_as_tree(left)
elif len(left) == 1:
to_reduce_left = left[0]
else:
return to_reduce_right
return to_reduce_left.union_all(to_reduce_right)
def get_existing_repository(namespace_name, repository_name, for_update=False, kind_filter=None):
query = (
Repository.select(Repository, Namespace)
.join(Namespace, on=(Repository.namespace_user == Namespace.id))
.where(Namespace.username == namespace_name, Repository.name == repository_name)
.where(Repository.state != RepositoryState.MARKED_FOR_DELETION)
)
if kind_filter:
query = (
query.switch(Repository).join(RepositoryKind).where(RepositoryKind.name == kind_filter)
)
if for_update:
query = db_for_update(query)
return query.get()
@lru_cache(maxsize=1)
def get_public_repo_visibility():
return Visibility.get(name="public")
def _lookup_team_role(name):
return _lookup_team_roles()[name]
@lru_cache(maxsize=1)
def _lookup_team_roles():
return {role.name: role for role in TeamRole.select()}
def filter_to_repos_for_user(
query, user_id=None, namespace=None, repo_kind="image", include_public=True, start_id=None
):
if not include_public and not user_id:
return Repository.select().where(Repository.id == "-1")
# Filter on the type of repository.
if repo_kind is not None:
try:
query = query.where(Repository.kind == Repository.kind.get_id(repo_kind))
except RepositoryKind.DoesNotExist:
raise DataModelException("Unknown repository kind")
# Add the start ID if necessary.
if start_id is not None:
query = query.where(Repository.id >= start_id)
# Add a namespace filter if necessary.
if namespace:
query = query.where(Namespace.username == namespace)
# Build a set of queries that, when unioned together, return the full set of visible repositories
# for the filters specified.
queries = []
if include_public:
queries.append(query.where(Repository.visibility == get_public_repo_visibility()))
if user_id is not None:
AdminTeam = Team.alias()
AdminTeamMember = TeamMember.alias()
# Add repositories in which the user has permission.
queries.append(
query.switch(RepositoryPermission).where(RepositoryPermission.user == user_id)
)
# Add repositories in which the user is a member of a team that has permission.
queries.append(
query.switch(RepositoryPermission)
.join(Team)
.join(TeamMember)
.where(TeamMember.user == user_id)
)
# Add repositories under namespaces in which the user is the org admin.
queries.append(
query.switch(Repository)
.join(AdminTeam, on=(Repository.namespace_user == AdminTeam.organization))
.join(AdminTeamMember, on=(AdminTeam.id == AdminTeamMember.team))
.where(AdminTeam.role == _lookup_team_role("admin"))
.where(AdminTeamMember.user == user_id)
)
return reduce(lambda l, r: l | r, queries)
def get_user_organizations(username):
UserAlias = User.alias()
return (
User.select()
.distinct()
.join(Team)
.join(TeamMember)
.join(UserAlias, on=(UserAlias.id == TeamMember.user))
.where(User.organization == True, UserAlias.username == username)
)
def calculate_image_aggregate_size(ancestors_str, image_size, parent_image):
ancestors = ancestors_str.split("/")[1:-1]
if not ancestors:
return image_size
if parent_image is None:
raise DataModelException("Could not load parent image")
ancestor_size = parent_image.aggregate_size
if ancestor_size is not None:
return ancestor_size + image_size
# Fallback to a slower path if the parent doesn't have an aggregate size saved.
# TODO: remove this code if/when we do a full backfill.
ancestor_size = (
ImageStorage.select(fn.Sum(ImageStorage.image_size))
.join(Image)
.where(Image.id << ancestors)
.scalar()
)
if ancestor_size is None:
return None
return ancestor_size + image_size
def update_last_accessed(token_or_user):
"""
Updates the `last_accessed` field on the given token or user.
If the existing field's value is within the configured threshold, the update is skipped.
"""
if not config.app_config.get("FEATURE_USER_LAST_ACCESSED"):
return
threshold = timedelta(seconds=config.app_config.get("LAST_ACCESSED_UPDATE_THRESHOLD_S", 120))
if (
token_or_user.last_accessed is not None
and datetime.utcnow() - token_or_user.last_accessed < threshold
):
# Skip updating, as we don't want to put undue pressure on the database.
return
model_class = token_or_user.__class__
last_accessed = datetime.utcnow()
try:
(
model_class.update(last_accessed=last_accessed)
.where(model_class.id == token_or_user.id)
.execute()
)
token_or_user.last_accessed = last_accessed
except ReadOnlyModeException:
pass
except PeeweeException as ex:
# If there is any form of DB exception, only fail if strict logging is enabled.
strict_logging_disabled = config.app_config.get("ALLOW_PULLS_WITHOUT_STRICT_LOGGING")
if strict_logging_disabled:
data = {
"exception": ex,
"token_or_user": token_or_user.id,
"class": str(model_class),
}
logger.exception("update last_accessed for token/user failed", extra=data)
else:
raise
def estimated_row_count(model_cls):
""" Returns the estimated number of rows in the given model. If available, uses engine-specific
estimation (which is very fast) and otherwise falls back to .count()
"""
return db_count_estimator(model_cls, db)