From f626d4ecebcd529b2d2c929cbef2b6646d97ff62 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Mon, 10 Aug 2020 12:20:12 -0400 Subject: [PATCH] Ensure shared blob layers are present on lookup (#511) Due to the requirement for the shared empty layer for manifest schema 1, we need to make sure it is written to the ImageStorage table, even if the only schemas pushed are version 2 Fixes https://issues.redhat.com/browse/PROJQUAY-948 --- data/registry_model/interface.py | 2 +- data/registry_model/registry_oci_model.py | 36 ++++++++++++++++------ data/registry_model/test/test_interface.py | 6 ++-- endpoints/v2/manifest.py | 2 +- util/secscan/test/test_blob_retriever.py | 2 +- 5 files changed, 32 insertions(+), 16 deletions(-) diff --git a/data/registry_model/interface.py b/data/registry_model/interface.py index c70b2a7b7..5b38d55e5 100644 --- a/data/registry_model/interface.py +++ b/data/registry_model/interface.py @@ -269,7 +269,7 @@ class RegistryDataInterface(object): """ @abstractmethod - def get_manifest_local_blobs(self, manifest, include_placements=False): + def get_manifest_local_blobs(self, manifest, storage, include_placements=False): """ Returns the set of local blobs for the given manifest or None if none. """ diff --git a/data/registry_model/registry_oci_model.py b/data/registry_model/registry_oci_model.py index 3acbeaeab..5f5338083 100644 --- a/data/registry_model/registry_oci_model.py +++ b/data/registry_model/registry_oci_model.py @@ -9,6 +9,7 @@ from data import model from data.cache import cache_key from data.model import oci, DataModelException from data.model.oci.retriever import RepositoryContentRetriever +from data.readreplica import ReadOnlyModeException from data.database import ( db_transaction, Image, @@ -37,7 +38,7 @@ from image.docker.schema1 import ( DOCKER_SCHEMA1_CONTENT_TYPES, DockerSchema1ManifestBuilder, ) -from image.docker.schema2 import EMPTY_LAYER_BLOB_DIGEST +from image.docker.schema2 import EMPTY_LAYER_BLOB_DIGEST, EMPTY_LAYER_BYTES logger = logging.getLogger(__name__) @@ -617,7 +618,7 @@ class OCIModel(RegistryDataInterface): repository_ref._db_id, parsed_manifest, storage, include_placements=include_placements, ) - def get_manifest_local_blobs(self, manifest, include_placements=False): + def get_manifest_local_blobs(self, manifest, storage, include_placements=False): """ Returns the set of local blobs for the given manifest or None if none. """ @@ -627,7 +628,7 @@ class OCIModel(RegistryDataInterface): return None return self._get_manifest_local_blobs( - manifest, manifest_row.repository_id, include_placements + manifest, manifest_row.repository_id, include_placements, storage ) def find_repository_with_garbage(self, limit_to_gc_policy_s): @@ -889,7 +890,7 @@ class OCIModel(RegistryDataInterface): manifest_row, manifest.get_parsed_manifest(), storage ) - def _get_manifest_local_blobs(self, manifest, repo_id, include_placements=False): + def _get_manifest_local_blobs(self, manifest, repo_id, storage, include_placements=False): parsed = manifest.get_parsed_manifest() if parsed is None: return None @@ -898,7 +899,9 @@ class OCIModel(RegistryDataInterface): if not len(local_blob_digests): return [] - blob_query = self._lookup_repo_storages_by_content_checksum(repo_id, local_blob_digests) + blob_query = self._lookup_repo_storages_by_content_checksum( + repo_id, local_blob_digests, storage + ) blobs = [] for image_storage in blob_query: placements = None @@ -932,7 +935,9 @@ class OCIModel(RegistryDataInterface): blob_digests.append(EMPTY_LAYER_BLOB_DIGEST) if blob_digests: - blob_query = self._lookup_repo_storages_by_content_checksum(repo_id, blob_digests) + blob_query = self._lookup_repo_storages_by_content_checksum( + repo_id, blob_digests, storage + ) storage_map = {blob.content_checksum: blob for blob in blob_query} layers = parsed.get_layers(retriever) @@ -970,7 +975,7 @@ class OCIModel(RegistryDataInterface): return manifest_layers - def _get_shared_storage(self, blob_digest): + def _get_shared_storage(self, blob_digest, storage=None): """ Returns an ImageStorage row for the blob digest if it is a globally shared storage. """ @@ -979,17 +984,28 @@ class OCIModel(RegistryDataInterface): # can be incredibly slow, and, since it is defined as a globally shared layer, this is extra # work we don't need to do. if blob_digest == EMPTY_LAYER_BLOB_DIGEST: - return model.blob.get_shared_blob(EMPTY_LAYER_BLOB_DIGEST) + found = model.blob.get_shared_blob(EMPTY_LAYER_BLOB_DIGEST) + if found is None and storage is not None: + # If we have the storage and the shared blob was not found, then simply create + # it now. This will handle the case where the data was never pushed. + try: + return model.blob.get_or_create_shared_blob( + EMPTY_LAYER_BLOB_DIGEST, EMPTY_LAYER_BYTES + ) + except ReadOnlyModeException: + return None + + return found return None - def _lookup_repo_storages_by_content_checksum(self, repo, checksums): + def _lookup_repo_storages_by_content_checksum(self, repo, checksums, storage): checksums = set(checksums) # Load any shared storages first. extra_storages = [] for checksum in list(checksums): - shared_storage = self._get_shared_storage(checksum) + shared_storage = self._get_shared_storage(checksum, storage=storage) if shared_storage is not None: extra_storages.append(shared_storage) checksums.remove(checksum) diff --git a/data/registry_model/test/test_interface.py b/data/registry_model/test/test_interface.py index 71c29c8ba..deb14df70 100644 --- a/data/registry_model/test/test_interface.py +++ b/data/registry_model/test/test_interface.py @@ -467,7 +467,7 @@ def test_layers_and_blobs(repo_namespace, repo_name, registry_model): assert manifest_layer.estimated_size(1) is not None assert isinstance(manifest_layer.layer_info, ManifestImageLayer) - blobs = registry_model.get_manifest_local_blobs(manifest, include_placements=True) + blobs = registry_model.get_manifest_local_blobs(manifest, storage, include_placements=True) assert {b.digest for b in blobs} == set(parsed.local_blob_digests) @@ -563,7 +563,7 @@ def test_mount_blob_into_repository(registry_model): target_repository_ref = registry_model.lookup_repository("devtable", "complex") - blobs = registry_model.get_manifest_local_blobs(manifest, include_placements=True) + blobs = registry_model.get_manifest_local_blobs(manifest, storage, include_placements=True) assert blobs for blob in blobs: @@ -589,7 +589,7 @@ def test_get_cached_repo_blob(registry_model): latest_tag = registry_model.get_repo_tag(repository_ref, "latest") manifest = registry_model.get_manifest_for_tag(latest_tag) - blobs = registry_model.get_manifest_local_blobs(manifest, include_placements=True) + blobs = registry_model.get_manifest_local_blobs(manifest, storage, include_placements=True) assert blobs blob = blobs[0] diff --git a/endpoints/v2/manifest.py b/endpoints/v2/manifest.py index d1f2c987f..362d07d31 100644 --- a/endpoints/v2/manifest.py +++ b/endpoints/v2/manifest.py @@ -318,7 +318,7 @@ def _write_manifest_and_log(namespace_name, repo_name, tag_name, manifest_impl): # Queue all blob manifests for replication. if features.STORAGE_REPLICATION: - blobs = registry_model.get_manifest_local_blobs(manifest) + blobs = registry_model.get_manifest_local_blobs(manifest, storage) if blobs is None: logger.error("Could not lookup blobs for manifest `%s`", manifest.digest) else: diff --git a/util/secscan/test/test_blob_retriever.py b/util/secscan/test/test_blob_retriever.py index 5d050be58..c12f02611 100644 --- a/util/secscan/test/test_blob_retriever.py +++ b/util/secscan/test/test_blob_retriever.py @@ -17,7 +17,7 @@ def test_generate_url(initialized_db): repo_ref = registry_model.lookup_repository("devtable", "simple") tag = registry_model.get_repo_tag(repo_ref, "latest") manifest = tag.manifest - blobs = registry_model.get_manifest_local_blobs(manifest) + blobs = registry_model.get_manifest_local_blobs(manifest, storage) retriever = BlobURLRetriever(storage, instance_keys, application) headers = retriever.headers_for_download(repo_ref, blobs[0])