From ac6a8d2f8b93bcff032d3b2dfc57e07432a96709 Mon Sep 17 00:00:00 2001 From: Flavian Missi Date: Tue, 10 May 2022 17:56:08 +0200 Subject: [PATCH] PROJQUAY-3750: support registries that do not return a digest header (#1310) The distribution spec does not require the docker-content-digest header to be set in response to a manifest GET/HEAD request. This changes both the proxy client and the registry proxy model to correctly check whether a manifest is up-to-date with the upstream registry or not when no digest header is received. NOTE: when checking staleness against registries that do not return the docker-content-digest header, Quay will make a GET request to the registry and calculate the digest from the manifest itself. GET requests usually count towards rate-limiting. This change also sets the accept-encoding header to 'identity'. The python requests library seems to automatically set the accept-encoding header to 'gzip'. Dockerhub ignores that header when serving blobs, but some registries don't (namely registry.access.redhat.com). When Quay receives a gzipped config blob (have not tested non-config blobs) for some reason it doesn't know how to handle it. I suspect it has to do wit the fact that in this case the content-length header will differ from the actual size of the response body, so when Quay tries to upload the blob it cannot correctly calculate the actual blob size, so it does a partial upload to its object storage, which then results in a digest mismatch error (BlobDigestMismatchException). --- data/registry_model/registry_proxy_model.py | 12 +++++++++++- proxy/__init__.py | 9 +++++++-- proxy/test_proxy.py | 21 ++++++++++++++++++++- 3 files changed, 38 insertions(+), 4 deletions(-) diff --git a/data/registry_model/registry_proxy_model.py b/data/registry_model/registry_proxy_model.py index b6e666390..9cfb36e6d 100644 --- a/data/registry_model/registry_proxy_model.py +++ b/data/registry_model/registry_proxy_model.py @@ -315,14 +315,24 @@ class ProxyModel(OCIModel): freshly out of the database, and a boolean indicating whether the returned tag was newly created or not. """ + upstream_manifest = None upstream_digest = self._proxy.manifest_exists(manifest_ref, ACCEPTED_MEDIA_TYPES) up_to_date = manifest.digest == upstream_digest + + # manifest_exists will return an empty/None digest when the upstream + # registry omits the docker-content-digest header. + if not upstream_digest: + upstream_manifest = self._pull_upstream_manifest(repo_ref.name, manifest_ref) + up_to_date = manifest.digest == upstream_manifest.digest + placeholder = manifest.internal_manifest_bytes.as_unicode() == "" if up_to_date and not placeholder: return tag, False + if upstream_manifest is None: + upstream_manifest = self._pull_upstream_manifest(repo_ref.name, manifest_ref) + self._enforce_repository_quota(repo_ref) - upstream_manifest = self._pull_upstream_manifest(repo_ref.name, manifest_ref) if up_to_date and placeholder: with db_disallow_replica_use(): with db_transaction(): diff --git a/proxy/__init__.py b/proxy/__init__.py index 776ba49d9..1c0e6790a 100644 --- a/proxy/__init__.py +++ b/proxy/__init__.py @@ -79,8 +79,10 @@ class Proxy: def manifest_exists(self, image_ref: str, media_types: list[str] | None = None) -> str | None: """ - Returns the manifest digest (docker-content-digest) if given by the - upstream registry. + Returns the manifest digest. + + Looks for the digest in the docker-content-digest header. If not + present in the response, parses the manifest then calculate the digest. If the manifest does not exist or the upstream registry errors, raises an UpstreamRegistryError exception. @@ -96,6 +98,9 @@ class Proxy: url = f"{self.base_url}/v2/{self._repo}/blobs/{digest}" resp = self.get( url, + headers={ + "Accept-Encoding": "identity", + }, allow_redirects=True, stream=True, ) diff --git a/proxy/test_proxy.py b/proxy/test_proxy.py index a2789a949..75294d972 100644 --- a/proxy/test_proxy.py +++ b/proxy/test_proxy.py @@ -18,6 +18,7 @@ ANONYMOUS_TOKEN = "anonymous-token" USER_TOKEN = "user-token" TAG = "14" TAG_404 = "666" +TAG_NO_DIGEST = "11" DIGEST = "sha256:2e7d2c03a9507ae265ecf5b5356885a53393a2029d241394997265a1a25aefc6" DIGEST_404 = "sha256:3e23e8160039594a33894f6564e1b1348bbd7a0088d42c4acb73eeaed59c009d" @@ -119,7 +120,11 @@ def docker_registry_manifest(url, request): } ], } - return response(200, content, request=request) + headers = { + "docker-content-digest": DIGEST, + "content-type": "application/vnd.docker.distribution.manifest.v2+json", + } + return response(200, content, headers, request=request) def docker_registry_manifest_404(url, request): @@ -135,6 +140,10 @@ def docker_registry_manifest_404(url, request): return response(404, content, request=request) +def docker_registry_manifest_no_digest(url, request): + return response(200, "", request=request) + + def docker_registry_blob(url, request): return response(200, request=request) @@ -160,6 +169,9 @@ def docker_registry_mock(url, request): elif url.path == f"/v2/library/postgres/manifests/{TAG_404}": return docker_registry_manifest_404(url, request) + elif url.path == f"/v2/library/postgres/manifests/{TAG_NO_DIGEST}": + return docker_registry_manifest_no_digest(url, request) + elif url.path == f"/v2/library/postgres/blobs/{DIGEST}": return docker_registry_blob(url, request) @@ -312,6 +324,7 @@ class TestProxy(unittest.TestCase): proxy = Proxy(self.config, "library/postgres") digest = proxy.manifest_exists(image_ref=TAG) self.assertNotEqual(digest, "") + self.assertNotEqual(digest, None) def test_manifest_exists_404(self): with HTTMock(docker_registry_mock): @@ -321,6 +334,12 @@ class TestProxy(unittest.TestCase): self.assertIn("404", str(excinfo.value)) + def test_manifest_exists_without_digest_header(self): + with HTTMock(docker_registry_mock): + proxy = Proxy(self.config, "library/postgres") + digest = proxy.manifest_exists(image_ref=TAG_NO_DIGEST) + self.assertIsNone(digest, None) + def test_get_blob(self): with HTTMock(docker_registry_mock): proxy = Proxy(self.config, "library/postgres")