1
0
mirror of https://github.com/quay/quay.git synced 2025-04-18 10:44:06 +03:00

storage: Enable multipart upload for Google Cloud Storage (PROJQUAY-6862) (#3748)

* storage: Enable multipart upload for Google Cloud Storage (PROJQUAY-6862)
This PR removes the `_stream_write_internal` function override that caused excessive memory consumption and defaults to the old one which chunks uploads. Server assembly is still not suppored by GCS, so we have to assemble everything locally. However, GCS does support the copy function, so a reupload is not needed.

~~~
REPOSITORY                                        TAG         IMAGE ID      CREATED      SIZE
registry.fedoraproject.org/fedora                 latest      ecd9f7ee77f4  2 days ago   165 MB
quay.skynet/ibazulic/big-mirror-test              size138gb   8e6ba9ff13c0  3 days ago   148 GB
quay.skynet/quay-mirror/big-mirror-test           size138gb   8e6ba9ff13c0  3 days ago   148 GB
quay.skynet/ibazulic/mfs-image-test               latest      ab14f2230dd9  7 days ago   5.96 GB
quay.skynet/ibazulic/azure-storage-big-file-test  latest      ede194b926e0  7 days ago   16.1 GB
quay.skynet/ibazulic/minio/minio                  latest      76ed5b96833a  6 weeks ago  532 B

Getting image source signatures
Copying blob 9d9c3d76c421 done   |
Copying blob fce7cf3b093c skipped: already exists
Copying config 8e6ba9ff13 done   |
Writing manifest to image destination
~~~

For uploading extremely big layers, 5 MiB as the default chunk size is not enough. The PR also enables support for user-defined chunk sizes via `minimum_chunk_size_mb` and `maximum_chunk_size_mb` which default to 5 Mib and 100 MiB respectively.

* Remove maximum_chunk_size_mb as it's not needed
This commit is contained in:
Ivan Bazulic 2025-04-04 10:16:12 -04:00 committed by GitHub
parent c25be58e23
commit a6713a669d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -911,6 +911,7 @@ class GoogleCloudStorage(_CloudStorage):
secret_key,
bucket_name,
boto_timeout=60,
minimum_chunk_size_mb=None,
signature_version=None,
):
# GCS does not support ListObjectV2
@ -935,6 +936,10 @@ class GoogleCloudStorage(_CloudStorage):
secret_key,
)
self.minimum_chunk_size = (
(minimum_chunk_size_mb if minimum_chunk_size_mb is not None else 5) * 1024 * 1024
)
# Workaround for setting GCS cors at runtime with boto
cors_xml = """<?xml version="1.0" encoding="UTF-8"?>
<CorsConfig>
@ -986,50 +991,6 @@ class GoogleCloudStorage(_CloudStorage):
.replace("AWSAccessKeyId", "GoogleAccessId")
)
def _stream_write_internal(
self,
path,
fp,
content_type=None,
content_encoding=None,
cancel_on_error=True,
size=filelike.READ_UNTIL_END,
):
"""
Writes the data found in the file-like stream to the given path, with optional limit on
size. Note that this method returns a *tuple* of (bytes_written, write_error) and should.
*not* raise an exception (such as IOError) if a problem uploading occurred. ALWAYS check
the returned tuple on calls to this method.
"""
# Minimum size of upload part size on S3 is 5MB
self._initialize_cloud_conn()
path = self._init_path(path)
obj = self.get_cloud_bucket().Object(path)
extra_args = {}
if content_type is not None:
extra_args["ContentType"] = content_type
if content_encoding is not None:
extra_args["ContentEncoding"] = content_encoding
if size != filelike.READ_UNTIL_END:
fp = filelike.StreamSlice(fp, 0, size)
with BytesIO() as buf:
# Stage the bytes into the buffer for use with the multipart upload file API
bytes_staged = self.stream_write_to_fp(fp, buf, size)
buf.seek(0)
# TODO figure out how to handle cancel_on_error=False
try:
obj.put(Body=buf, **extra_args)
except Exception as ex:
return 0, ex
return bytes_staged, None
def complete_chunked_upload(self, uuid, final_path, storage_metadata):
self._initialize_cloud_conn()