diff --git a/app.py b/app.py index 689ce711b..07a85467d 100644 --- a/app.py +++ b/app.py @@ -66,7 +66,6 @@ from util.metrics.prometheus import PrometheusPlugin from util.repomirror.api import RepoMirrorAPI from util.tufmetadata.api import TUFMetadataAPI from util.security.instancekeys import InstanceKeys -from util.security.signing import Signer from util.greenlet_tracing import enable_tracing @@ -244,7 +243,6 @@ build_logs = BuildLogs(app) authentication = UserAuthentication(app, config_provider, OVERRIDE_CONFIG_DIRECTORY) userevents = UserEventsBuilderModule(app) superusers = SuperUserManager(app) -signer = Signer(app, config_provider) instance_keys = InstanceKeys(app) label_validator = LabelValidator(app) build_canceller = BuildCanceller(app) @@ -260,9 +258,6 @@ dockerfile_build_queue = WorkQueue( app.config["DOCKERFILE_BUILD_QUEUE_NAME"], tf, has_namespace=True ) notification_queue = WorkQueue(app.config["NOTIFICATION_QUEUE_NAME"], tf, has_namespace=True) -secscan_notification_queue = WorkQueue( - app.config["SECSCAN_NOTIFICATION_QUEUE_NAME"], tf, has_namespace=False -) export_action_logs_queue = WorkQueue( app.config["EXPORT_ACTION_LOGS_QUEUE_NAME"], tf, has_namespace=True ) @@ -277,7 +272,6 @@ all_queues = [ image_replication_queue, dockerfile_build_queue, notification_queue, - secscan_notification_queue, chunk_cleanup_queue, repository_gc_queue, namespace_gc_queue, @@ -315,10 +309,13 @@ model.config.store = storage model.config.register_repo_cleanup_callback(tuf_metadata_api.delete_metadata) secscan_model.configure(app, instance_keys, storage) -secscan_model.register_model_cleanup_callbacks(model.config) logs_model.configure(app.config) +# NOTE: We re-use the page token key here as this is just to obfuscate IDs for V1, and +# does not need to actually be secure. +registry_model.set_id_hash_salt(app.config.get("PAGE_TOKEN_KEY")) + @login_manager.user_loader def load_user(user_uuid): diff --git a/application.py b/application.py index 1a0c799fa..0a1215c0f 100644 --- a/application.py +++ b/application.py @@ -13,7 +13,6 @@ from app import app as application # Bind all of the blueprints import web -import verbs import registry import secscan diff --git a/conf/init/supervisord_conf_create.py b/conf/init/supervisord_conf_create.py index 8b1ab1ac9..9a9f12158 100644 --- a/conf/init/supervisord_conf_create.py +++ b/conf/init/supervisord_conf_create.py @@ -29,14 +29,12 @@ def default_services(): "notificationworker": {"autostart": "true"}, "queuecleanupworker": {"autostart": "true"}, "repositoryactioncounter": {"autostart": "true"}, - "security_notification_worker": {"autostart": "true"}, "securityworker": {"autostart": "true"}, "storagereplication": {"autostart": "true"}, "teamsyncworker": {"autostart": "true"}, "dnsmasq": {"autostart": "true"}, "gunicorn-registry": {"autostart": "true"}, "gunicorn-secscan": {"autostart": "true"}, - "gunicorn-verbs": {"autostart": "true"}, "gunicorn-web": {"autostart": "true"}, "ip-resolver-update-worker": {"autostart": "true"}, "jwtproxy": {"autostart": "true"}, @@ -45,6 +43,7 @@ def default_services(): "pushgateway": {"autostart": "true"}, "servicekey": {"autostart": "true"}, "repomirrorworker": {"autostart": "false"}, + "backfillmanifestworker": {"autostart": "false"}, } diff --git a/conf/nginx/http-base.conf b/conf/nginx/http-base.conf index 7b4f0ef28..70399c9ad 100644 --- a/conf/nginx/http-base.conf +++ b/conf/nginx/http-base.conf @@ -49,9 +49,6 @@ upstream web_app_server { upstream jwtproxy_secscan { server unix:/tmp/jwtproxy_secscan.sock fail_timeout=0; } -upstream verbs_app_server { - server unix:/tmp/gunicorn_verbs.sock fail_timeout=0; -} upstream registry_app_server { server unix:/tmp/gunicorn_registry.sock fail_timeout=0; } diff --git a/conf/nginx/server-base.conf.jnj b/conf/nginx/server-base.conf.jnj index e01a47a5f..2e411bf8a 100644 --- a/conf/nginx/server-base.conf.jnj +++ b/conf/nginx/server-base.conf.jnj @@ -306,19 +306,6 @@ location = /v1/_ping { return 200 'true'; } -location /c1/ { - proxy_buffering off; - - proxy_request_buffering off; - - proxy_pass http://verbs_app_server; - proxy_temp_path /tmp 1 2; - - {% if enable_rate_limits %} - limit_req zone=staticauth burst=5 nodelay; - {% endif %} -} - location /static/ { # checks for static file, if not found proxy to app alias {{static_dir}}/; diff --git a/conf/supervisord.conf.jnj b/conf/supervisord.conf.jnj index edb2d1ee1..50edf8fc6 100644 --- a/conf/supervisord.conf.jnj +++ b/conf/supervisord.conf.jnj @@ -138,14 +138,6 @@ autostart = {{ config['repositoryactioncounter']['autostart'] }} stdout_events_enabled = true stderr_events_enabled = true -[program:security_notification_worker] -environment= - PYTHONPATH=%(ENV_QUAYDIR)s -command=python -m workers.security_notification_worker -autostart = {{ config['security_notification_worker']['autostart'] }} -stdout_events_enabled = true -stderr_events_enabled = true - [program:securityworker] environment= PYTHONPATH=%(ENV_QUAYDIR)s @@ -194,14 +186,6 @@ autostart = {{ config['gunicorn-secscan']['autostart'] }} stdout_events_enabled = true stderr_events_enabled = true -[program:gunicorn-verbs] -environment= - PYTHONPATH=%(ENV_QUAYDIR)s -command=nice -n 10 gunicorn -c %(ENV_QUAYCONF)s/gunicorn_verbs.py verbs:application -autostart = {{ config['gunicorn-verbs']['autostart'] }} -stdout_events_enabled = true -stderr_events_enabled = true - [program:gunicorn-web] environment= PYTHONPATH=%(ENV_QUAYDIR)s diff --git a/config.py b/config.py index 1e1c30ba2..82486ac89 100644 --- a/config.py +++ b/config.py @@ -259,7 +259,6 @@ class DefaultConfig(ImmutableConfig): NOTIFICATION_QUEUE_NAME = "notification" DOCKERFILE_BUILD_QUEUE_NAME = "dockerfilebuild" REPLICATION_QUEUE_NAME = "imagestoragereplication" - SECSCAN_NOTIFICATION_QUEUE_NAME = "security_notification" CHUNK_CLEANUP_QUEUE_NAME = "chunk_cleanup" NAMESPACE_GC_QUEUE_NAME = "namespacegc" REPOSITORY_GC_QUEUE_NAME = "repositorygc" @@ -476,9 +475,6 @@ class DefaultConfig(ImmutableConfig): # The version of the API to use for the security scanner. SECURITY_SCANNER_API_VERSION = "v1" - # Namespace whitelist for security scanner. - SECURITY_SCANNER_V4_NAMESPACE_WHITELIST = [] - # Minimum number of seconds before re-indexing a manifest with the security scanner. SECURITY_SCANNER_V4_REINDEX_THRESHOLD = 300 @@ -739,3 +735,6 @@ class DefaultConfig(ImmutableConfig): # Feature Flag: Whether the repository action count worker is enabled. FEATURE_REPOSITORY_ACTION_COUNTER = True + + # TEMP FEATURE: Backfill the sizes of manifests. + FEATURE_MANIFEST_SIZE_BACKFILL = True diff --git a/config_app/js/core-config-setup/core-config-setup.js b/config_app/js/core-config-setup/core-config-setup.js index 67d27cc45..dafe0eb2d 100644 --- a/config_app/js/core-config-setup/core-config-setup.js +++ b/config_app/js/core-config-setup/core-config-setup.js @@ -74,10 +74,6 @@ angular.module("quay-config") return config.AUTHENTICATION_TYPE == 'AppToken'; }}, - {'id': 'signer', 'title': 'ACI Signing', 'condition': function(config) { - return config.FEATURE_ACI_CONVERSION; - }}, - {'id': 'github-login', 'title': 'Github (Enterprise) Authentication', 'condition': function(config) { return config.FEATURE_GITHUB_LOGIN; }}, diff --git a/data/database.py b/data/database.py index d522e71d7..c56863a7b 100644 --- a/data/database.py +++ b/data/database.py @@ -685,6 +685,7 @@ class User(BaseModel): NamespaceGeoRestriction, ManifestSecurityStatus, RepoMirrorConfig, + UploadedBlob, } | appr_classes | v22_classes @@ -888,6 +889,7 @@ class Repository(BaseModel): RepoMirrorRule, DeletedRepository, ManifestSecurityStatus, + UploadedBlob, } | appr_classes | v22_classes @@ -1115,6 +1117,7 @@ class Image(BaseModel): return list(map(int, self.ancestors.split("/")[1:-1])) +@deprecated_model class DerivedStorageForImage(BaseModel): source_image = ForeignKeyField(Image) derivative = ForeignKeyField(ImageStorage) @@ -1127,6 +1130,7 @@ class DerivedStorageForImage(BaseModel): indexes = ((("source_image", "transformation", "uniqueness_hash"), True),) +@deprecated_model class RepositoryTag(BaseModel): name = CharField() image = ForeignKeyField(Image) @@ -1391,8 +1395,8 @@ class ExternalNotificationMethod(BaseModel): class RepositoryNotification(BaseModel): uuid = CharField(default=uuid_generator, index=True) repository = ForeignKeyField(Repository) - event = ForeignKeyField(ExternalNotificationEvent) - method = ForeignKeyField(ExternalNotificationMethod) + event = EnumField(ExternalNotificationEvent) + method = EnumField(ExternalNotificationMethod) title = CharField(null=True) config_json = TextField() event_config_json = TextField(default="{}") @@ -1414,6 +1418,19 @@ class RepositoryAuthorizedEmail(BaseModel): ) +class UploadedBlob(BaseModel): + """ + UploadedBlob tracks a recently uploaded blob and prevents it from being GCed + while within the expiration window. + """ + + id = BigAutoField() + repository = ForeignKeyField(Repository) + blob = ForeignKeyField(ImageStorage) + uploaded_at = DateTimeField(default=datetime.utcnow) + expires_at = DateTimeField(index=True) + + class BlobUpload(BaseModel): repository = ForeignKeyField(Repository) uuid = CharField(index=True, unique=True) @@ -1699,12 +1716,16 @@ class Manifest(BaseModel): media_type = EnumField(MediaType) manifest_bytes = TextField() + config_media_type = CharField(null=True) + layers_compressed_size = BigIntegerField(null=True) + class Meta: database = db read_only_config = read_only_config indexes = ( (("repository", "digest"), True), (("repository", "media_type"), False), + (("repository", "config_media_type"), False), ) diff --git a/data/migrations/dba_operator/3383aad1e992-databasemigration.yaml b/data/migrations/dba_operator/3383aad1e992-databasemigration.yaml new file mode 100644 index 000000000..df2173233 --- /dev/null +++ b/data/migrations/dba_operator/3383aad1e992-databasemigration.yaml @@ -0,0 +1,51 @@ + +--- +apiVersion: dbaoperator.app-sre.redhat.com/v1alpha1 +kind: DatabaseMigration +metadata: + name: 3383aad1e992 +spec: + migrationContainerSpec: + command: + - /quay-registry/quay-entrypoint.sh + - migrate + - 3383aad1e992 + image: quay.io/quay/quay + name: 3383aad1e992 + previous: !!python/tuple + - 04b9d2191450 + schemaHints: + - columns: + - name: id + nullable: false + - name: repository_id + nullable: false + - name: blob_id + nullable: false + - name: uploaded_at + nullable: false + - name: expires_at + nullable: false + operation: createTable + table: uploadedblob + - columns: + - name: blob_id + nullable: false + indexName: uploadedblob_blob_id + indexType: index + operation: createIndex + table: uploadedblob + - columns: + - name: expires_at + nullable: false + indexName: uploadedblob_expires_at + indexType: index + operation: createIndex + table: uploadedblob + - columns: + - name: repository_id + nullable: false + indexName: uploadedblob_repository_id + indexType: index + operation: createIndex + table: uploadedblob diff --git a/data/migrations/dba_operator/88e64904d000-databasemigration.yaml b/data/migrations/dba_operator/88e64904d000-databasemigration.yaml new file mode 100644 index 000000000..36d628462 --- /dev/null +++ b/data/migrations/dba_operator/88e64904d000-databasemigration.yaml @@ -0,0 +1,36 @@ + +--- +apiVersion: dbaoperator.app-sre.redhat.com/v1alpha1 +kind: DatabaseMigration +metadata: + name: 88e64904d000 +spec: + migrationContainerSpec: + command: + - /quay-registry/quay-entrypoint.sh + - migrate + - 88e64904d000 + image: quay.io/quay/quay + name: 88e64904d000 + previous: !!python/tuple + - 3383aad1e992 + schemaHints: + - columns: + - name: config_media_type + nullable: true + operation: addColumn + table: manifest + - columns: + - name: layers_compressed_size + nullable: true + operation: addColumn + table: manifest + - columns: + - name: repository_id + nullable: false + - name: config_media_type + nullable: true + indexName: manifest_repository_id_config_media_type + indexType: index + operation: createIndex + table: manifest diff --git a/data/migrations/versions/3383aad1e992_add_uploadedblob_table.py b/data/migrations/versions/3383aad1e992_add_uploadedblob_table.py new file mode 100644 index 000000000..3ebf2bce6 --- /dev/null +++ b/data/migrations/versions/3383aad1e992_add_uploadedblob_table.py @@ -0,0 +1,57 @@ +"""Add UploadedBlob table + +Revision ID: 3383aad1e992 +Revises: 04b9d2191450 +Create Date: 2020-04-21 11:45:54.837077 + +""" + +# revision identifiers, used by Alembic. +revision = "3383aad1e992" +down_revision = "04b9d2191450" + +import sqlalchemy as sa +from sqlalchemy.dialects import mysql + + +def upgrade(op, tables, tester): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "uploadedblob", + sa.Column("id", sa.BigInteger(), nullable=False), + sa.Column("repository_id", sa.Integer(), nullable=False), + sa.Column("blob_id", sa.Integer(), nullable=False), + sa.Column("uploaded_at", sa.DateTime(), nullable=False), + sa.Column("expires_at", sa.DateTime(), nullable=False), + sa.ForeignKeyConstraint( + ["blob_id"], ["imagestorage.id"], name=op.f("fk_uploadedblob_blob_id_imagestorage") + ), + sa.ForeignKeyConstraint( + ["repository_id"], + ["repository.id"], + name=op.f("fk_uploadedblob_repository_id_repository"), + ), + sa.PrimaryKeyConstraint("id", name=op.f("pk_uploadedblob")), + ) + op.create_index("uploadedblob_blob_id", "uploadedblob", ["blob_id"], unique=False) + op.create_index("uploadedblob_expires_at", "uploadedblob", ["expires_at"], unique=False) + op.create_index("uploadedblob_repository_id", "uploadedblob", ["repository_id"], unique=False) + # ### end Alembic commands ### + + # ### population of test data ### # + tester.populate_table( + "uploadedblob", + [ + ("repository_id", tester.TestDataType.Foreign("repository")), + ("blob_id", tester.TestDataType.Foreign("imagestorage")), + ("uploaded_at", tester.TestDataType.DateTime), + ("expires_at", tester.TestDataType.DateTime), + ], + ) + # ### end population of test data ### # + + +def downgrade(op, tables, tester): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table("uploadedblob") + # ### end Alembic commands ### diff --git a/data/migrations/versions/88e64904d000_add_new_metadata_columns_to_manifest_.py b/data/migrations/versions/88e64904d000_add_new_metadata_columns_to_manifest_.py new file mode 100644 index 000000000..3138289c1 --- /dev/null +++ b/data/migrations/versions/88e64904d000_add_new_metadata_columns_to_manifest_.py @@ -0,0 +1,39 @@ +"""Add new metadata columns to Manifest table + +Revision ID: 88e64904d000 +Revises: 3383aad1e992 +Create Date: 2020-04-21 14:00:50.376517 + +""" + +# revision identifiers, used by Alembic. +revision = "88e64904d000" +down_revision = "3383aad1e992" + +import sqlalchemy as sa + + +def upgrade(op, tables, tester): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column("manifest", sa.Column("config_media_type", sa.String(length=255), nullable=True)) + op.add_column("manifest", sa.Column("layers_compressed_size", sa.BigInteger(), nullable=True)) + op.create_index( + "manifest_repository_id_config_media_type", + "manifest", + ["repository_id", "config_media_type"], + unique=False, + ) + # ### end Alembic commands ### + + # ### population of test data ### # + tester.populate_column("manifest", "config_media_type", tester.TestDataType.String) + tester.populate_column("manifest", "layers_compressed_size", tester.TestDataType.Integer) + # ### end population of test data ### # + + +def downgrade(op, tables, tester): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_index("manifest_repository_id_config_media_type", table_name="manifest") + op.drop_column("manifest", "layers_compressed_size") + op.drop_column("manifest", "config_media_type") + # ### end Alembic commands ### diff --git a/data/model/blob.py b/data/model/blob.py index 53a1816c8..70ee09793 100644 --- a/data/model/blob.py +++ b/data/model/blob.py @@ -1,6 +1,6 @@ import logging -from datetime import datetime +from datetime import datetime, timedelta from uuid import uuid4 from data.model import ( @@ -14,11 +14,13 @@ from data.model import ( ) from data.database import ( Repository, + RepositoryState, Namespace, ImageStorage, Image, ImageStoragePlacement, BlobUpload, + UploadedBlob, ImageStorageLocation, db_random_func, ) @@ -27,53 +29,6 @@ from data.database import ( logger = logging.getLogger(__name__) -def get_repository_blob_by_digest(repository, blob_digest): - """ - Find the content-addressable blob linked to the specified repository. - """ - assert blob_digest - try: - storage = ( - ImageStorage.select(ImageStorage.uuid) - .join(Image) - .where( - Image.repository == repository, - ImageStorage.content_checksum == blob_digest, - ImageStorage.uploading == False, - ) - .get() - ) - - return storage_model.get_storage_by_uuid(storage.uuid) - except (ImageStorage.DoesNotExist, InvalidImageException): - raise BlobDoesNotExist("Blob does not exist with digest: {0}".format(blob_digest)) - - -def get_repo_blob_by_digest(namespace, repo_name, blob_digest): - """ - Find the content-addressable blob linked to the specified repository. - """ - assert blob_digest - try: - storage = ( - ImageStorage.select(ImageStorage.uuid) - .join(Image) - .join(Repository) - .join(Namespace, on=(Namespace.id == Repository.namespace_user)) - .where( - Repository.name == repo_name, - Namespace.username == namespace, - ImageStorage.content_checksum == blob_digest, - ImageStorage.uploading == False, - ) - .get() - ) - - return storage_model.get_storage_by_uuid(storage.uuid) - except (ImageStorage.DoesNotExist, InvalidImageException): - raise BlobDoesNotExist("Blob does not exist with digest: {0}".format(blob_digest)) - - def store_blob_record_and_temp_link( namespace, repo_name, @@ -157,16 +112,26 @@ def temp_link_blob(repository_id, blob_digest, link_expiration_s): def _temp_link_blob(repository_id, storage, link_expiration_s): """ Note: Should *always* be called by a parent under a transaction. """ - random_image_name = str(uuid4()) + try: + repository = Repository.get(id=repository_id) + except Repository.DoesNotExist: + return None - # Create a temporary link into the repository, to be replaced by the v1 metadata later - # and create a temporary tag to reference it - image = Image.create( - storage=storage, docker_image_id=random_image_name, repository=repository_id + if repository.state == RepositoryState.MARKED_FOR_DELETION: + return None + + return UploadedBlob.create( + repository=repository_id, + blob=storage, + expires_at=datetime.utcnow() + timedelta(seconds=link_expiration_s), + ) + + +def lookup_expired_uploaded_blobs(repository): + """ Looks up all expired uploaded blobs in a repository. """ + return UploadedBlob.select().where( + UploadedBlob.repository == repository, UploadedBlob.expires_at <= datetime.utcnow() ) - temp_tag = tag.create_temporary_hidden_tag(repository_id, image, link_expiration_s) - if temp_tag is None: - image.delete_instance() def get_stale_blob_upload(stale_timespan): @@ -192,7 +157,12 @@ def get_blob_upload_by_uuid(upload_uuid): Loads the upload with the given UUID, if any. """ try: - return BlobUpload.select().where(BlobUpload.uuid == upload_uuid).get() + return ( + BlobUpload.select(BlobUpload, ImageStorageLocation) + .join(ImageStorageLocation) + .where(BlobUpload.uuid == upload_uuid) + .get() + ) except BlobUpload.DoesNotExist: return None diff --git a/data/model/gc.py b/data/model/gc.py index f459bedc4..d4f316fe0 100644 --- a/data/model/gc.py +++ b/data/model/gc.py @@ -1,8 +1,9 @@ import logging from peewee import fn, IntegrityError +from datetime import datetime -from data.model import config, db_transaction, storage, _basequery, tag as pre_oci_tag +from data.model import config, db_transaction, storage, _basequery, tag as pre_oci_tag, blob from data.model.oci import tag as oci_tag from data.database import Repository, db_for_update from data.database import ApprTag @@ -28,8 +29,14 @@ from data.database import ( RepoMirrorConfig, RepositoryPermission, RepositoryAuthorizedEmail, + UploadedBlob, +) +from data.database import ( + RepositoryTag, + TagManifest, + Image, + DerivedStorageForImage, ) -from data.database import RepositoryTag, TagManifest, Image, DerivedStorageForImage from data.database import TagManifestToManifest, TagToRepositoryTag, TagManifestLabelMap logger = logging.getLogger(__name__) @@ -98,6 +105,7 @@ def purge_repository(repo, force=False): assert RepositoryTag.select().where(RepositoryTag.repository == repo).count() == 0 assert Manifest.select().where(Manifest.repository == repo).count() == 0 assert ManifestBlob.select().where(ManifestBlob.repository == repo).count() == 0 + assert UploadedBlob.select().where(UploadedBlob.repository == repo).count() == 0 assert ( ManifestSecurityStatus.select().where(ManifestSecurityStatus.repository == repo).count() == 0 @@ -194,7 +202,27 @@ def _purge_repository_contents(repo): if not found: break - # TODO: remove this once we're fully on the OCI data model. + # Purge any uploaded blobs that have expired. + while True: + found = False + for uploaded_blobs in _chunk_iterate_for_deletion( + UploadedBlob.select().where(UploadedBlob.repository == repo) + ): + logger.debug( + "Found %s uploaded blobs to GC under repository %s", len(uploaded_blobs), repo + ) + found = True + context = _GarbageCollectorContext(repo) + for uploaded_blob in uploaded_blobs: + logger.debug("Deleting uploaded blob %s under repository %s", uploaded_blob, repo) + assert uploaded_blob.repository_id == repo.id + _purge_uploaded_blob(uploaded_blob, context, allow_non_expired=True) + + if not found: + break + + # TODO: remove this once we've removed the foreign key constraints from RepositoryTag + # and Image. while True: found = False repo_tag_query = RepositoryTag.select().where(RepositoryTag.repository == repo) @@ -217,6 +245,7 @@ def _purge_repository_contents(repo): assert RepositoryTag.select().where(RepositoryTag.repository == repo).count() == 0 assert Manifest.select().where(Manifest.repository == repo).count() == 0 assert ManifestBlob.select().where(ManifestBlob.repository == repo).count() == 0 + assert UploadedBlob.select().where(UploadedBlob.repository == repo).count() == 0 # Add all remaining images to a new context. We do this here to minimize the number of images # we need to load. @@ -259,6 +288,7 @@ def garbage_collect_repo(repo): _run_garbage_collection(context) had_changes = True + # TODO: Remove once we've removed the foreign key constraints from RepositoryTag and Image. for tags in _chunk_iterate_for_deletion(pre_oci_tag.lookup_unrecoverable_tags(repo)): logger.debug("Found %s tags to GC under repository %s", len(tags), repo) context = _GarbageCollectorContext(repo) @@ -271,6 +301,18 @@ def garbage_collect_repo(repo): _run_garbage_collection(context) had_changes = True + # Purge expired uploaded blobs. + for uploaded_blobs in _chunk_iterate_for_deletion(blob.lookup_expired_uploaded_blobs(repo)): + logger.debug("Found %s uploaded blobs to GC under repository %s", len(uploaded_blobs), repo) + context = _GarbageCollectorContext(repo) + for uploaded_blob in uploaded_blobs: + logger.debug("Deleting uploaded blob %s under repository %s", uploaded_blob, repo) + assert uploaded_blob.repository_id == repo.id + _purge_uploaded_blob(uploaded_blob, context) + + _run_garbage_collection(context) + had_changes = True + return had_changes @@ -376,6 +418,16 @@ def _purge_pre_oci_tag(tag, context, allow_non_expired=False): reloaded_tag.delete_instance() +def _purge_uploaded_blob(uploaded_blob, context, allow_non_expired=False): + assert allow_non_expired or uploaded_blob.expires_at <= datetime.utcnow() + + # Add the storage to be checked. + context.add_blob_id(uploaded_blob.blob_id) + + # Delete the uploaded blob. + uploaded_blob.delete_instance() + + def _check_manifest_used(manifest_id): assert manifest_id is not None diff --git a/data/model/image.py b/data/model/image.py index 2a6f8115e..40c5931bb 100644 --- a/data/model/image.py +++ b/data/model/image.py @@ -23,13 +23,10 @@ from data.database import ( ImageStorage, ImageStorageLocation, RepositoryPermission, - DerivedStorageForImage, ImageStorageTransformation, User, ) -from util.canonicaljson import canonicalize - logger = logging.getLogger(__name__) @@ -554,62 +551,3 @@ def set_secscan_status(image, indexed, version): .where((Image.security_indexed_engine != version) | (Image.security_indexed != indexed)) .execute() ) != 0 - - -def _get_uniqueness_hash(varying_metadata): - if not varying_metadata: - return None - - return hashlib.sha256(json.dumps(canonicalize(varying_metadata)).encode("utf-8")).hexdigest() - - -def find_or_create_derived_storage( - source_image, transformation_name, preferred_location, varying_metadata=None -): - existing = find_derived_storage_for_image(source_image, transformation_name, varying_metadata) - if existing is not None: - return existing - - uniqueness_hash = _get_uniqueness_hash(varying_metadata) - trans = ImageStorageTransformation.get(name=transformation_name) - new_storage = storage.create_v1_storage(preferred_location) - - try: - derived = DerivedStorageForImage.create( - source_image=source_image, - derivative=new_storage, - transformation=trans, - uniqueness_hash=uniqueness_hash, - ) - except IntegrityError: - # Storage was created while this method executed. Just return the existing. - ImageStoragePlacement.delete().where(ImageStoragePlacement.storage == new_storage).execute() - new_storage.delete_instance() - return find_derived_storage_for_image(source_image, transformation_name, varying_metadata) - - return derived - - -def find_derived_storage_for_image(source_image, transformation_name, varying_metadata=None): - uniqueness_hash = _get_uniqueness_hash(varying_metadata) - - try: - found = ( - DerivedStorageForImage.select(ImageStorage, DerivedStorageForImage) - .join(ImageStorage) - .switch(DerivedStorageForImage) - .join(ImageStorageTransformation) - .where( - DerivedStorageForImage.source_image == source_image, - ImageStorageTransformation.name == transformation_name, - DerivedStorageForImage.uniqueness_hash == uniqueness_hash, - ) - .get() - ) - return found - except DerivedStorageForImage.DoesNotExist: - return None - - -def delete_derived_storage(derived_storage): - derived_storage.derivative.delete_instance(recursive=True) diff --git a/data/model/oauth.py b/data/model/oauth.py index faec5ec5c..62808ea10 100644 --- a/data/model/oauth.py +++ b/data/model/oauth.py @@ -352,8 +352,13 @@ def lookup_access_token_by_uuid(token_uuid): def lookup_access_token_for_user(user_obj, token_uuid): try: - return OAuthAccessToken.get( - OAuthAccessToken.authorized_user == user_obj, OAuthAccessToken.uuid == token_uuid + return ( + OAuthAccessToken.select(OAuthAccessToken, User) + .join(User) + .where( + OAuthAccessToken.authorized_user == user_obj, OAuthAccessToken.uuid == token_uuid + ) + .get() ) except OAuthAccessToken.DoesNotExist: return None diff --git a/data/model/oci/blob.py b/data/model/oci/blob.py index 2d4f789ba..5ed9796aa 100644 --- a/data/model/oci/blob.py +++ b/data/model/oci/blob.py @@ -1,7 +1,6 @@ -from data.database import ImageStorage, ManifestBlob +from data.database import ImageStorage, ManifestBlob, UploadedBlob from data.model import BlobDoesNotExist from data.model.storage import get_storage_by_uuid, InvalidImageException -from data.model.blob import get_repository_blob_by_digest as legacy_get def get_repository_blob_by_digest(repository, blob_digest): @@ -9,8 +8,34 @@ def get_repository_blob_by_digest(repository, blob_digest): Find the content-addressable blob linked to the specified repository and returns it or None if none. """ + # First try looking for a recently uploaded blob. If none found that is matching, + # check the repository itself. + storage = _lookup_blob_uploaded(repository, blob_digest) + if storage is None: + storage = _lookup_blob_in_repository(repository, blob_digest) + + return get_storage_by_uuid(storage.uuid) if storage is not None else None + + +def _lookup_blob_uploaded(repository, blob_digest): try: - storage = ( + return ( + ImageStorage.select(ImageStorage.uuid) + .join(UploadedBlob) + .where( + UploadedBlob.repository == repository, + ImageStorage.content_checksum == blob_digest, + ImageStorage.uploading == False, + ) + .get() + ) + except ImageStorage.DoesNotExist: + return None + + +def _lookup_blob_in_repository(repository, blob_digest): + try: + return ( ImageStorage.select(ImageStorage.uuid) .join(ManifestBlob) .where( @@ -20,12 +45,5 @@ def get_repository_blob_by_digest(repository, blob_digest): ) .get() ) - - return get_storage_by_uuid(storage.uuid) - except (ImageStorage.DoesNotExist, InvalidImageException): - # TODO: Remove once we are no longer using the legacy tables. - # Try the legacy call. - try: - return legacy_get(repository, blob_digest) - except BlobDoesNotExist: - return None + except ImageStorage.DoesNotExist: + return None diff --git a/data/model/oci/manifest.py b/data/model/oci/manifest.py index de99b1bf5..d8c2797f4 100644 --- a/data/model/oci/manifest.py +++ b/data/model/oci/manifest.py @@ -1,4 +1,6 @@ +import json import logging +import os from collections import namedtuple @@ -10,6 +12,10 @@ from data.database import ( ManifestBlob, ManifestLegacyImage, ManifestChild, + ImageStorage, + ImageStoragePlacement, + ImageStorageTransformation, + ImageStorageSignature, db_transaction, ) from data.model import BlobDoesNotExist @@ -17,11 +23,12 @@ from data.model.blob import get_or_create_shared_blob, get_shared_blob from data.model.oci.tag import filter_to_alive_tags, create_temporary_tag_if_necessary from data.model.oci.label import create_manifest_label from data.model.oci.retriever import RepositoryContentRetriever -from data.model.storage import lookup_repo_storages_by_content_checksum +from data.model.storage import lookup_repo_storages_by_content_checksum, create_v1_storage from data.model.image import lookup_repository_images, get_image, synthesize_v1_image from image.docker.schema2 import EMPTY_LAYER_BLOB_DIGEST, EMPTY_LAYER_BYTES from image.docker.schema1 import ManifestException from image.docker.schema2.list import MalformedSchema2ManifestList +from util.canonicaljson import canonicalize from util.validation import is_json @@ -206,91 +213,17 @@ def _create_manifest( child_manifest_rows[child_manifest_info.manifest.digest] = child_manifest_info.manifest child_manifest_label_dicts.append(labels) - # Ensure all the blobs in the manifest exist. - digests = set(manifest_interface_instance.local_blob_digests) - blob_map = {} - - # If the special empty layer is required, simply load it directly. This is much faster - # than trying to load it on a per repository basis, and that is unnecessary anyway since - # this layer is predefined. - if EMPTY_LAYER_BLOB_DIGEST in digests: - digests.remove(EMPTY_LAYER_BLOB_DIGEST) - blob_map[EMPTY_LAYER_BLOB_DIGEST] = get_shared_blob(EMPTY_LAYER_BLOB_DIGEST) - if not blob_map[EMPTY_LAYER_BLOB_DIGEST]: - if raise_on_error: - raise CreateManifestException("Unable to retrieve specialized empty blob") - - logger.warning("Could not find the special empty blob in storage") - return None - - if digests: - query = lookup_repo_storages_by_content_checksum(repository_id, digests) - blob_map.update({s.content_checksum: s for s in query}) - for digest_str in digests: - if digest_str not in blob_map: - logger.warning( - "Unknown blob `%s` under manifest `%s` for repository `%s`", - digest_str, - manifest_interface_instance.digest, - repository_id, - ) - - if raise_on_error: - raise CreateManifestException("Unknown blob `%s`" % digest_str) - - return None - - # Special check: If the empty layer blob is needed for this manifest, add it to the - # blob map. This is necessary because Docker decided to elide sending of this special - # empty layer in schema version 2, but we need to have it referenced for GC and schema version 1. - if EMPTY_LAYER_BLOB_DIGEST not in blob_map: - try: - requires_empty_layer = manifest_interface_instance.get_requires_empty_layer_blob( - retriever - ) - except ManifestException as ex: - if raise_on_error: - raise CreateManifestException(str(ex)) - - return None - - if requires_empty_layer is None: - if raise_on_error: - raise CreateManifestException("Could not load configuration blob") - - return None - - if requires_empty_layer: - shared_blob = get_or_create_shared_blob( - EMPTY_LAYER_BLOB_DIGEST, EMPTY_LAYER_BYTES, storage - ) - assert not shared_blob.uploading - assert shared_blob.content_checksum == EMPTY_LAYER_BLOB_DIGEST - blob_map[EMPTY_LAYER_BLOB_DIGEST] = shared_blob - - # Determine and populate the legacy image if necessary. Manifest lists will not have a legacy - # image. - legacy_image = None - if manifest_interface_instance.has_legacy_image: - try: - legacy_image_id = _populate_legacy_image( - repository_id, manifest_interface_instance, blob_map, retriever, raise_on_error - ) - except ManifestException as me: - logger.error("Got manifest error when populating legacy images: %s", me) - if raise_on_error: - raise CreateManifestException( - "Attempt to create an invalid manifest: %s. Please report this issue." % me - ) - - return None - - if legacy_image_id is None: - return None - - legacy_image = get_image(repository_id, legacy_image_id) - if legacy_image is None: - return None + # Build the map from required blob digests to the blob objects. + blob_map = _build_blob_map( + repository_id, + manifest_interface_instance, + retriever, + storage, + raise_on_error, + require_empty_layer=False, + ) + if blob_map is None: + return None # Create the manifest and its blobs. media_type = Manifest.media_type.get_id(manifest_interface_instance.media_type) @@ -314,6 +247,8 @@ def _create_manifest( digest=manifest_interface_instance.digest, media_type=media_type, manifest_bytes=manifest_interface_instance.bytes.as_encoded_str(), + config_media_type=manifest_interface_instance.config_media_type, + layers_compressed_size=manifest_interface_instance.layers_compressed_size, ) except IntegrityError as ie: try: @@ -339,12 +274,6 @@ def _create_manifest( if blobs_to_insert: ManifestBlob.insert_many(blobs_to_insert).execute() - # Set the legacy image (if applicable). - if legacy_image is not None: - ManifestLegacyImage.create( - repository=repository_id, image=legacy_image, manifest=manifest - ) - # Insert the manifest child rows (if applicable). if child_manifest_rows: children_to_insert = [ @@ -392,6 +321,131 @@ def _create_manifest( return CreatedManifest(manifest=manifest, newly_created=True, labels_to_apply=labels_to_apply) +def _build_blob_map( + repository_id, + manifest_interface_instance, + retriever, + storage, + raise_on_error=False, + require_empty_layer=True, +): + """ Builds a map containing the digest of each blob referenced by the given manifest, + to its associated Blob row in the database. This method also verifies that the blob + is accessible under the given repository. Returns None on error (unless raise_on_error + is specified). If require_empty_layer is set to True, the method will check if the manifest + references the special shared empty layer blob and, if so, add it to the map. Otherwise, + the empty layer blob is only returned if it was *explicitly* referenced in the manifest. + This is necessary because Docker V2_2/OCI manifests can implicitly reference an empty blob + layer for image layers that only change metadata. + """ + + # Ensure all the blobs in the manifest exist. + digests = set(manifest_interface_instance.local_blob_digests) + blob_map = {} + + # If the special empty layer is required, simply load it directly. This is much faster + # than trying to load it on a per repository basis, and that is unnecessary anyway since + # this layer is predefined. + if EMPTY_LAYER_BLOB_DIGEST in digests: + digests.remove(EMPTY_LAYER_BLOB_DIGEST) + blob_map[EMPTY_LAYER_BLOB_DIGEST] = get_shared_blob(EMPTY_LAYER_BLOB_DIGEST) + if not blob_map[EMPTY_LAYER_BLOB_DIGEST]: + if raise_on_error: + raise CreateManifestException("Unable to retrieve specialized empty blob") + + logger.warning("Could not find the special empty blob in storage") + return None + + if digests: + query = lookup_repo_storages_by_content_checksum(repository_id, digests, with_uploads=True) + blob_map.update({s.content_checksum: s for s in query}) + for digest_str in digests: + if digest_str not in blob_map: + logger.warning( + "Unknown blob `%s` under manifest `%s` for repository `%s`", + digest_str, + manifest_interface_instance.digest, + repository_id, + ) + + if raise_on_error: + raise CreateManifestException("Unknown blob `%s`" % digest_str) + + return None + + # Special check: If the empty layer blob is needed for this manifest, add it to the + # blob map. This is necessary because Docker decided to elide sending of this special + # empty layer in schema version 2, but we need to have it referenced for schema version 1. + if require_empty_layer and EMPTY_LAYER_BLOB_DIGEST not in blob_map: + try: + requires_empty_layer = manifest_interface_instance.get_requires_empty_layer_blob( + retriever + ) + except ManifestException as ex: + if raise_on_error: + raise CreateManifestException(str(ex)) + + return None + + if requires_empty_layer is None: + if raise_on_error: + raise CreateManifestException("Could not load configuration blob") + + return None + + if requires_empty_layer: + shared_blob = get_or_create_shared_blob( + EMPTY_LAYER_BLOB_DIGEST, EMPTY_LAYER_BYTES, storage + ) + assert not shared_blob.uploading + assert shared_blob.content_checksum == EMPTY_LAYER_BLOB_DIGEST + blob_map[EMPTY_LAYER_BLOB_DIGEST] = shared_blob + + return blob_map + + +def populate_legacy_images_for_testing(manifest, manifest_interface_instance, storage): + """ Populates the legacy image rows for the given manifest. """ + # NOTE: This method is only kept around for use by legacy tests that still require + # legacy images. As a result, we make sure we're in testing mode before we run. + assert os.getenv("TEST") == "true" + + repository_id = manifest.repository_id + retriever = RepositoryContentRetriever.for_repository(repository_id, storage) + + blob_map = _build_blob_map( + repository_id, manifest_interface_instance, storage, True, require_empty_layer=True + ) + if blob_map is None: + return None + + # Determine and populate the legacy image if necessary. Manifest lists will not have a legacy + # image. + legacy_image = None + if manifest_interface_instance.has_legacy_image: + try: + legacy_image_id = _populate_legacy_image( + repository_id, manifest_interface_instance, blob_map, retriever, True + ) + except ManifestException as me: + raise CreateManifestException( + "Attempt to create an invalid manifest: %s. Please report this issue." % me + ) + + if legacy_image_id is None: + return None + + legacy_image = get_image(repository_id, legacy_image_id) + if legacy_image is None: + return None + + # Set the legacy image (if applicable). + if legacy_image is not None: + ManifestLegacyImage.create( + repository=repository_id, image=legacy_image, manifest=manifest + ) + + def _populate_legacy_image( repository_id, manifest_interface_instance, blob_map, retriever, raise_on_error=False ): diff --git a/data/model/oci/tag.py b/data/model/oci/tag.py index 3a560c497..81b0e35cf 100644 --- a/data/model/oci/tag.py +++ b/data/model/oci/tag.py @@ -123,7 +123,14 @@ def list_repository_tag_history( Note that the returned Manifest will not contain the manifest contents. """ query = ( - Tag.select(Tag, Manifest.id, Manifest.digest, Manifest.media_type) + Tag.select( + Tag, + Manifest.id, + Manifest.digest, + Manifest.media_type, + Manifest.layers_compressed_size, + Manifest.config_media_type, + ) .join(Manifest) .where(Tag.repository == repository_id) .order_by(Tag.lifetime_start_ms.desc(), Tag.name) @@ -141,31 +148,14 @@ def list_repository_tag_history( if active_tags_only: query = filter_to_alive_tags(query) + else: + query = filter_to_visible_tags(query) - query = filter_to_visible_tags(query) results = list(query) return results[0:page_size], len(results) > page_size -def get_legacy_images_for_tags(tags): - """ - Returns a map from tag ID to the legacy image for the tag. - """ - if not tags: - return {} - - query = ( - ManifestLegacyImage.select(ManifestLegacyImage, Image, ImageStorage) - .join(Image) - .join(ImageStorage) - .where(ManifestLegacyImage.manifest << [tag.manifest_id for tag in tags]) - ) - - by_manifest = {mli.manifest_id: mli.image for mli in query} - return {tag.id: by_manifest[tag.manifest_id] for tag in tags if tag.manifest_id in by_manifest} - - def find_matching_tag(repository_id, tag_names, tag_kinds=None): """ Finds an alive tag in the specified repository with one of the specified tag names and returns @@ -417,7 +407,6 @@ def delete_tags_for_manifest(manifest): """ query = Tag.select().where(Tag.manifest == manifest) query = filter_to_alive_tags(query) - query = filter_to_visible_tags(query) tags = list(query) now_ms = get_epoch_timestamp_ms() @@ -446,9 +435,8 @@ def filter_to_alive_tags(query, now_ms=None, model=Tag): if now_ms is None: now_ms = get_epoch_timestamp_ms() - return query.where((model.lifetime_end_ms >> None) | (model.lifetime_end_ms > now_ms)).where( - model.hidden == False - ) + query = query.where((model.lifetime_end_ms >> None) | (model.lifetime_end_ms > now_ms)) + return filter_to_visible_tags(query) def set_tag_expiration_sec_for_manifest(manifest_id, expiration_seconds): @@ -578,70 +566,6 @@ def tags_containing_legacy_image(image): return filter_to_alive_tags(tags) -def lookup_notifiable_tags_for_legacy_image(docker_image_id, storage_uuid, event_name): - """ - Yields any alive Tags found in repositories with an event with the given name registered and - whose legacy Image has the given docker image ID and storage UUID. - """ - event = ExternalNotificationEvent.get(name=event_name) - images = ( - Image.select() - .join(ImageStorage) - .where(Image.docker_image_id == docker_image_id, ImageStorage.uuid == storage_uuid) - ) - - for image in list(images): - # Ensure the image is under a repository that supports the event. - try: - RepositoryNotification.get(repository=image.repository_id, event=event) - except RepositoryNotification.DoesNotExist: - continue - - # If found in a repository with the valid event, yield the tag(s) that contains the image. - for tag in tags_containing_legacy_image(image): - yield tag - - -def get_tags_for_legacy_image(image_id): - """ Returns the Tag's that have the associated legacy image. - - NOTE: This is for legacy support in the old security notification worker and should - be removed once that code is no longer necessary. - """ - return filter_to_alive_tags( - Tag.select() - .distinct() - .join(Manifest) - .join(ManifestLegacyImage) - .where(ManifestLegacyImage.image == image_id) - ) - - -def _filter_has_repository_event(query, event): - """ Filters the query by ensuring the repositories returned have the given event. - - NOTE: This is for legacy support in the old security notification worker and should - be removed once that code is no longer necessary. - """ - return ( - query.join(Repository) - .join(RepositoryNotification) - .where(RepositoryNotification.event == event) - ) - - -def filter_tags_have_repository_event(query, event): - """ Filters the query by ensuring the tags live in a repository that has the given - event. Also orders the results by lifetime_start_ms. - - NOTE: This is for legacy support in the old security notification worker and should - be removed once that code is no longer necessary. - """ - query = _filter_has_repository_event(query, event) - query = query.switch(Tag).order_by(Tag.lifetime_start_ms.desc()) - return query - - def find_repository_with_garbage(limit_to_gc_policy_s): """ Returns a repository that has garbage (defined as an expired Tag that is past the repo's namespace's expiration window) or None if none. @@ -680,3 +604,20 @@ def find_repository_with_garbage(limit_to_gc_policy_s): return None except Repository.DoesNotExist: return None + + +def get_legacy_images_for_tags(tags): + """ + Returns a map from tag ID to the legacy image for the tag. + """ + if not tags: + return {} + + query = ( + ManifestLegacyImage.select(ManifestLegacyImage, Image) + .join(Image) + .where(ManifestLegacyImage.manifest << [tag.manifest_id for tag in tags]) + ) + + by_manifest = {mli.manifest_id: mli.image for mli in query} + return {tag.id: by_manifest[tag.manifest_id] for tag in tags if tag.manifest_id in by_manifest} diff --git a/data/model/oci/test/test_oci_manifest.py b/data/model/oci/test/test_oci_manifest.py index ec71d2ddd..4d7fadae1 100644 --- a/data/model/oci/test/test_oci_manifest.py +++ b/data/model/oci/test/test_oci_manifest.py @@ -166,6 +166,8 @@ def test_get_or_create_manifest(schema_version, initialized_db): builder.add_layer(random_digest, len(random_data.encode("utf-8"))) sample_manifest_instance = builder.build() + assert sample_manifest_instance.layers_compressed_size is not None + # Create a new manifest. created_manifest = get_or_create_manifest(repository, sample_manifest_instance, storage) created = created_manifest.manifest @@ -177,15 +179,18 @@ def test_get_or_create_manifest(schema_version, initialized_db): assert created.digest == sample_manifest_instance.digest assert created.manifest_bytes == sample_manifest_instance.bytes.as_encoded_str() assert created_manifest.labels_to_apply == expected_labels + assert created.config_media_type == sample_manifest_instance.config_media_type + assert created.layers_compressed_size == sample_manifest_instance.layers_compressed_size + + # Lookup the manifest and verify. + found = lookup_manifest(repository, created.digest, allow_dead=True) + assert found.digest == created.digest + assert found.config_media_type == created.config_media_type + assert found.layers_compressed_size == created.layers_compressed_size # Verify it has a temporary tag pointing to it. assert Tag.get(manifest=created, hidden=True).lifetime_end_ms - # Verify the legacy image. - legacy_image = get_legacy_image_for_manifest(created) - assert legacy_image is not None - assert legacy_image.storage.content_checksum == random_digest - # Verify the linked blobs. blob_digests = [ mb.blob.content_checksum @@ -295,6 +300,8 @@ def test_get_or_create_manifest_list(initialized_db): assert created_list assert created_list.media_type.name == manifest_list.media_type assert created_list.digest == manifest_list.digest + assert created_list.config_media_type == manifest_list.config_media_type + assert created_list.layers_compressed_size == manifest_list.layers_compressed_size # Ensure the child manifest links exist. child_manifests = { @@ -423,6 +430,8 @@ def test_get_or_create_manifest_with_remote_layers(initialized_db): assert created_manifest assert created_manifest.media_type.name == manifest.media_type assert created_manifest.digest == manifest.digest + assert created_manifest.config_media_type == manifest.config_media_type + assert created_manifest.layers_compressed_size == manifest.layers_compressed_size # Verify the legacy image. legacy_image = get_legacy_image_for_manifest(created_manifest) diff --git a/data/model/oci/test/test_oci_tag.py b/data/model/oci/test/test_oci_tag.py index 360e134b2..8aef55df0 100644 --- a/data/model/oci/test/test_oci_tag.py +++ b/data/model/oci/test/test_oci_tag.py @@ -18,7 +18,6 @@ from data.model.oci.tag import ( get_most_recent_tag, get_most_recent_tag_lifetime_start, list_alive_tags, - get_legacy_images_for_tags, filter_to_alive_tags, filter_to_visible_tags, list_repository_tag_history, @@ -92,13 +91,6 @@ def test_list_alive_tags(initialized_db): for tag in filter_to_visible_tags(filter_to_alive_tags(Tag.select())): tags = list_alive_tags(tag.repository) assert tag in tags - - with assert_query_count(1): - legacy_images = get_legacy_images_for_tags(tags) - - for tag in tags: - assert ManifestLegacyImage.get(manifest=tag.manifest).image == legacy_images[tag.id] - found = True assert found @@ -154,6 +146,11 @@ def test_list_repository_tag_history(namespace_name, repo_name, initialized_db): assert results assert not has_more + assert results[0].manifest.id is not None + assert results[0].manifest.digest is not None + assert results[0].manifest.media_type is not None + assert results[0].manifest.layers_compressed_size is not None + def test_list_repository_tag_history_with_history(initialized_db): repo = get_repository("devtable", "history") diff --git a/data/model/repo_mirror.py b/data/model/repo_mirror.py index ddd846ccf..2e65f0d6d 100644 --- a/data/model/repo_mirror.py +++ b/data/model/repo_mirror.py @@ -2,7 +2,7 @@ import re from datetime import datetime, timedelta -from peewee import IntegrityError, fn +from peewee import IntegrityError, fn, JOIN from jsonschema import ValidationError from data.database import ( @@ -14,6 +14,7 @@ from data.database import ( Repository, uuid_generator, db_transaction, + User, ) from data.fields import DecryptedValue from data.model import DataModelException @@ -362,7 +363,14 @@ def get_mirror(repository): Return the RepoMirrorConfig associated with the given Repository, or None if it doesn't exist. """ try: - return RepoMirrorConfig.get(repository=repository) + return ( + RepoMirrorConfig.select(RepoMirrorConfig, User, RepoMirrorRule) + .join(User, JOIN.LEFT_OUTER) + .switch(RepoMirrorConfig) + .join(RepoMirrorRule) + .where(RepoMirrorConfig.repository == repository) + .get() + ) except RepoMirrorConfig.DoesNotExist: return None diff --git a/data/model/repository.py b/data/model/repository.py index d0ff34b5f..423766cd0 100644 --- a/data/model/repository.py +++ b/data/model/repository.py @@ -32,7 +32,6 @@ from data.database import ( RepositoryActionCount, Role, RepositoryAuthorizedEmail, - DerivedStorageForImage, Label, db_for_update, get_epoch_timestamp, @@ -500,6 +499,10 @@ def lookup_repository(repo_id): return None +def repository_visibility_name(repository): + return "public" if is_repository_public(repository) else "private" + + def is_repository_public(repository): return repository.visibility_id == _basequery.get_public_repo_visibility().id diff --git a/data/model/storage.py b/data/model/storage.py index 94b96d7ab..061d9ee87 100644 --- a/data/model/storage.py +++ b/data/model/storage.py @@ -25,6 +25,7 @@ from data.database import ( ApprBlob, ensure_under_transaction, ManifestBlob, + UploadedBlob, ) logger = logging.getLogger(__name__) @@ -86,7 +87,13 @@ def _is_storage_orphaned(candidate_id): except Image.DoesNotExist: pass - return True + try: + UploadedBlob.get(blob=candidate_id) + return False + except UploadedBlob.DoesNotExist: + pass + + return True def garbage_collect_storage(storage_id_whitelist): @@ -307,57 +314,65 @@ def get_layer_path_for_storage(storage_uuid, cas_path, content_checksum): return store.blob_path(content_checksum) -def lookup_repo_storages_by_content_checksum(repo, checksums, by_manifest=False): +def lookup_repo_storages_by_content_checksum(repo, checksums, with_uploads=False): """ Looks up repository storages (without placements) matching the given repository and checksum. """ + checksums = list(set(checksums)) if not checksums: return [] + # If the request is not with uploads, simply return the blobs found under the manifests + # for the repository. + if not with_uploads: + return _lookup_repo_storages_by_content_checksum(repo, checksums, ManifestBlob) + + # Otherwise, first check the UploadedBlob table and, once done, then check the ManifestBlob + # table. + found_via_uploaded = list( + _lookup_repo_storages_by_content_checksum(repo, checksums, UploadedBlob) + ) + if len(found_via_uploaded) == len(checksums): + return found_via_uploaded + + checksums_remaining = set(checksums) - { + uploaded.content_checksum for uploaded in found_via_uploaded + } + found_via_manifest = list( + _lookup_repo_storages_by_content_checksum(repo, checksums_remaining, ManifestBlob) + ) + return found_via_uploaded + found_via_manifest + + +def _lookup_repo_storages_by_content_checksum(repo, checksums, model_class): + assert checksums + # There may be many duplicates of the checksums, so for performance reasons we are going # to use a union to select just one storage with each checksum queries = [] - for counter, checksum in enumerate(set(checksums)): + for counter, checksum in enumerate(checksums): query_alias = "q{0}".format(counter) - # TODO: Remove once we have a new-style model for tracking temp uploaded blobs and - # all legacy tables have been removed. - if by_manifest: - candidate_subq = ( - ImageStorage.select( - ImageStorage.id, - ImageStorage.content_checksum, - ImageStorage.image_size, - ImageStorage.uuid, - ImageStorage.cas_path, - ImageStorage.uncompressed_size, - ImageStorage.uploading, - ) - .join(ManifestBlob) - .where(ManifestBlob.repository == repo, ImageStorage.content_checksum == checksum) - .limit(1) - .alias(query_alias) - ) - else: - candidate_subq = ( - ImageStorage.select( - ImageStorage.id, - ImageStorage.content_checksum, - ImageStorage.image_size, - ImageStorage.uuid, - ImageStorage.cas_path, - ImageStorage.uncompressed_size, - ImageStorage.uploading, - ) - .join(Image) - .where(Image.repository == repo, ImageStorage.content_checksum == checksum) - .limit(1) - .alias(query_alias) + candidate_subq = ( + ImageStorage.select( + ImageStorage.id, + ImageStorage.content_checksum, + ImageStorage.image_size, + ImageStorage.uuid, + ImageStorage.cas_path, + ImageStorage.uncompressed_size, + ImageStorage.uploading, ) + .join(model_class) + .where(model_class.repository == repo, ImageStorage.content_checksum == checksum) + .limit(1) + .alias(query_alias) + ) queries.append(ImageStorage.select(SQL("*")).from_(candidate_subq)) + assert queries return _basequery.reduce_as_tree(queries) diff --git a/data/model/tag.py b/data/model/tag.py index 061e96604..e64cbcf3e 100644 --- a/data/model/tag.py +++ b/data/model/tag.py @@ -1,75 +1,9 @@ -import logging - -from calendar import timegm -from datetime import datetime -from uuid import uuid4 - -from peewee import IntegrityError, JOIN, fn -from data.model import ( - image, - storage, - db_transaction, - DataModelException, - _basequery, - InvalidManifestException, - TagAlreadyCreatedException, - StaleTagException, - config, -) from data.database import ( RepositoryTag, Repository, - RepositoryState, - Image, - ImageStorage, Namespace, - TagManifest, - RepositoryNotification, - Label, - TagManifestLabel, get_epoch_timestamp, - db_for_update, - Manifest, - ManifestLabel, - ManifestBlob, - ManifestLegacyImage, - TagManifestToManifest, - TagManifestLabelMap, - TagToRepositoryTag, - Tag, - get_epoch_timestamp_ms, ) -from util.timedeltastring import convert_to_timedelta - - -logger = logging.getLogger(__name__) - - -def create_temporary_hidden_tag(repo, image_obj, expiration_s): - """ - Create a tag with a defined timeline, that will not appear in the UI or CLI. - - Returns the name of the temporary tag or None on error. - """ - now_ts = get_epoch_timestamp() - expire_ts = now_ts + expiration_s - tag_name = str(uuid4()) - - # Ensure the repository is not marked for deletion. - with db_transaction(): - current = Repository.get(id=repo) - if current.state == RepositoryState.MARKED_FOR_DELETION: - return None - - RepositoryTag.create( - repository=repo, - image=image_obj, - name=tag_name, - lifetime_start_ts=now_ts, - lifetime_end_ts=expire_ts, - hidden=True, - ) - return tag_name def lookup_unrecoverable_tags(repo): diff --git a/data/model/test/test_gc.py b/data/model/test/test_gc.py index eb91341df..f102155f0 100644 --- a/data/model/test/test_gc.py +++ b/data/model/test/test_gc.py @@ -30,6 +30,7 @@ from data.database import ( TagToRepositoryTag, ImageStorageLocation, RepositoryTag, + UploadedBlob, ) from data.model.oci.test.test_oci_manifest import create_manifest_for_testing from digest.digest_tools import sha256_digest @@ -61,11 +62,7 @@ def default_tag_policy(initialized_db): def _delete_temp_links(repo): """ Deletes any temp links to blobs. """ - for hidden in list( - RepositoryTag.select().where(RepositoryTag.hidden == True, RepositoryTag.repository == repo) - ): - hidden.delete_instance() - hidden.image.delete_instance() + UploadedBlob.delete().where(UploadedBlob.repository == repo).execute() def _populate_blob(repo, content): @@ -128,6 +125,10 @@ def move_tag(repository, tag, image_ids, expect_gc=True): repo_ref, manifest, tag, storage, raise_on_error=True ) + tag_ref = registry_model.get_repo_tag(repo_ref, tag) + manifest_ref = registry_model.get_manifest_for_tag(tag_ref) + registry_model.populate_legacy_images_for_testing(manifest_ref, storage) + if expect_gc: assert model.gc.garbage_collect_repo(repository) == expect_gc @@ -156,10 +157,17 @@ def _get_dangling_storage_count(): storage_ids = set([current.id for current in ImageStorage.select()]) referenced_by_image = set([image.storage_id for image in Image.select()]) referenced_by_manifest = set([blob.blob_id for blob in ManifestBlob.select()]) - referenced_by_derived = set( + referenced_by_uploaded = set([upload.blob_id for upload in UploadedBlob.select()]) + referenced_by_derived_image = set( [derived.derivative_id for derived in DerivedStorageForImage.select()] ) - return len(storage_ids - referenced_by_image - referenced_by_derived - referenced_by_manifest) + return len( + storage_ids + - referenced_by_image + - referenced_by_derived_image + - referenced_by_manifest + - referenced_by_uploaded + ) def _get_dangling_label_count(): @@ -199,7 +207,7 @@ def assert_gc_integrity(expect_storage_removed=True): for blob_row in ApprBlob.select(): existing_digests.add(blob_row.digest) - # Store the number of dangling storages and labels. + # Store the number of dangling objects. existing_storage_count = _get_dangling_storage_count() existing_label_count = _get_dangling_label_count() existing_manifest_count = _get_dangling_manifest_count() @@ -247,6 +255,13 @@ def assert_gc_integrity(expect_storage_removed=True): .count() ) + if shared == 0: + shared = ( + UploadedBlob.select() + .where(UploadedBlob.blob == removed_image_and_storage.storage_id) + .count() + ) + if shared == 0: with pytest.raises(ImageStorage.DoesNotExist): ImageStorage.get(id=removed_image_and_storage.storage_id) @@ -672,6 +687,10 @@ def test_images_shared_cas(default_tag_policy, initialized_db): repo_ref, manifest, "first", storage, raise_on_error=True ) + tag_ref = registry_model.get_repo_tag(repo_ref, "first") + manifest_ref = registry_model.get_manifest_for_tag(tag_ref) + registry_model.populate_legacy_images_for_testing(manifest_ref, storage) + # Store another as `second`. builder = DockerSchema1ManifestBuilder( repository.namespace_user.username, repository.name, "second" @@ -682,6 +701,10 @@ def test_images_shared_cas(default_tag_policy, initialized_db): repo_ref, manifest, "second", storage, raise_on_error=True ) + tag_ref = registry_model.get_repo_tag(repo_ref, "second") + manifest_ref = registry_model.get_manifest_for_tag(tag_ref) + registry_model.populate_legacy_images_for_testing(manifest_ref, storage) + # Manually retarget the second manifest's blob to the second row. try: second_blob = ManifestBlob.get(manifest=created._db_id, blob=is1) diff --git a/data/model/test/test_image.py b/data/model/test/test_image.py deleted file mode 100644 index 267c46c7c..000000000 --- a/data/model/test/test_image.py +++ /dev/null @@ -1,109 +0,0 @@ -import pytest - -from collections import defaultdict -from data.model import image, repository -from playhouse.test_utils import assert_query_count - -from test.fixtures import * - - -@pytest.fixture() -def images(initialized_db): - images = image.get_repository_images("devtable", "simple") - assert len(images) - return images - - -def test_get_image_with_storage(images, initialized_db): - for current in images: - storage_uuid = current.storage.uuid - - with assert_query_count(1): - retrieved = image.get_image_with_storage(current.docker_image_id, storage_uuid) - assert retrieved.id == current.id - assert retrieved.storage.uuid == storage_uuid - - -def test_get_parent_images(images, initialized_db): - for current in images: - if not len(current.ancestor_id_list()): - continue - - with assert_query_count(1): - parent_images = list(image.get_parent_images("devtable", "simple", current)) - - assert len(parent_images) == len(current.ancestor_id_list()) - assert set(current.ancestor_id_list()) == {i.id for i in parent_images} - - for parent in parent_images: - with assert_query_count(0): - assert parent.storage.id - - -def test_get_image(images, initialized_db): - for current in images: - repo = current.repository - - with assert_query_count(1): - found = image.get_image(repo, current.docker_image_id) - - assert found.id == current.id - - -def test_placements(images, initialized_db): - with assert_query_count(1): - placements_map = image.get_placements_for_images(images) - - for current in images: - assert current.storage.id in placements_map - - with assert_query_count(2): - expected_image, expected_placements = image.get_image_and_placements( - "devtable", "simple", current.docker_image_id - ) - - assert expected_image.id == current.id - assert len(expected_placements) == len(placements_map.get(current.storage.id)) - assert {p.id for p in expected_placements} == { - p.id for p in placements_map.get(current.storage.id) - } - - -def test_get_repo_image(images, initialized_db): - for current in images: - with assert_query_count(1): - found = image.get_repo_image("devtable", "simple", current.docker_image_id) - - assert found.id == current.id - with assert_query_count(1): - assert found.storage.id - - -def test_get_repo_image_and_storage(images, initialized_db): - for current in images: - with assert_query_count(1): - found = image.get_repo_image_and_storage("devtable", "simple", current.docker_image_id) - - assert found.id == current.id - with assert_query_count(0): - assert found.storage.id - - -def test_get_repository_images_without_placements(images, initialized_db): - ancestors_map = defaultdict(list) - for img in images: - current = img.parent - while current is not None: - ancestors_map[current.id].append(img.id) - current = current.parent - - for current in images: - repo = current.repository - - with assert_query_count(1): - found = list( - image.get_repository_images_without_placements(repo, with_ancestor=current) - ) - - assert len(found) == len(ancestors_map[current.id]) + 1 - assert {i.id for i in found} == set(ancestors_map[current.id] + [current.id]) diff --git a/data/model/test/test_tag.py b/data/model/test/test_tag.py deleted file mode 100644 index cfb7bede0..000000000 --- a/data/model/test/test_tag.py +++ /dev/null @@ -1,23 +0,0 @@ -import pytest - -from data.database import ( - RepositoryState, - Image, -) - -from test.fixtures import * - - -def test_create_temp_tag(initialized_db): - repo = model.repository.get_repository("devtable", "simple") - image = Image.get(repository=repo) - assert model.tag.create_temporary_hidden_tag(repo, image, 10000000) is not None - - -def test_create_temp_tag_deleted_repo(initialized_db): - repo = model.repository.get_repository("devtable", "simple") - repo.state = RepositoryState.MARKED_FOR_DELETION - repo.save() - - image = Image.get(repository=repo) - assert model.tag.create_temporary_hidden_tag(repo, image, 10000000) is None diff --git a/data/model/user.py b/data/model/user.py index 402b6a779..4c0064fa6 100644 --- a/data/model/user.py +++ b/data/model/user.py @@ -1326,7 +1326,11 @@ def get_region_locations(user): """ Returns the locations defined as preferred storage for the given user. """ - query = UserRegion.select().join(ImageStorageLocation).where(UserRegion.user == user) + query = ( + UserRegion.select(UserRegion, ImageStorageLocation) + .join(ImageStorageLocation) + .where(UserRegion.user == user) + ) return set([region.location.name for region in query]) diff --git a/data/registry_model/__init__.py b/data/registry_model/__init__.py index 26eadcd5d..140ede42f 100644 --- a/data/registry_model/__init__.py +++ b/data/registry_model/__init__.py @@ -13,6 +13,9 @@ class RegistryModelProxy(object): def __getattr__(self, attr): return getattr(self._model, attr) + def set_id_hash_salt(self, hash_salt): + self._model.set_id_hash_salt(hash_salt) + registry_model = RegistryModelProxy() logger.info("===============================") diff --git a/data/registry_model/datatypes.py b/data/registry_model/datatypes.py index fcec97953..e36a14e7d 100644 --- a/data/registry_model/datatypes.py +++ b/data/registry_model/datatypes.py @@ -1,4 +1,5 @@ import hashlib +import json from collections import namedtuple from enum import Enum, unique @@ -172,8 +173,8 @@ class Label(datatype("Label", ["key", "value", "uuid", "source_type_name", "medi key=label.key, value=label.value, uuid=label.uuid, - media_type_name=label.media_type.name, - source_type_name=label.source_type.name, + media_type_name=model.label.get_media_types()[label.media_type_id], + source_type_name=model.label.get_label_source_types()[label.source_type_id], ) @@ -189,13 +190,6 @@ class ShallowTag(datatype("ShallowTag", ["name"])): return ShallowTag(db_id=tag.id, name=tag.name) - @classmethod - def for_repository_tag(cls, repository_tag): - if repository_tag is None: - return None - - return ShallowTag(db_id=repository_tag.id, name=repository_tag.name) - @property def id(self): """ @@ -223,7 +217,7 @@ class Tag( """ @classmethod - def for_tag(cls, tag, legacy_image=None): + def for_tag(cls, tag, legacy_id_handler, manifest_row=None, legacy_image_row=None): if tag is None: return None @@ -235,55 +229,34 @@ class Tag( lifetime_end_ms=tag.lifetime_end_ms, lifetime_start_ts=tag.lifetime_start_ms // 1000, lifetime_end_ts=tag.lifetime_end_ms // 1000 if tag.lifetime_end_ms else None, - manifest_digest=tag.manifest.digest, + manifest_digest=manifest_row.digest if manifest_row else tag.manifest.digest, inputs=dict( - legacy_image=legacy_image, - manifest=tag.manifest, + legacy_id_handler=legacy_id_handler, + legacy_image_row=legacy_image_row, + manifest_row=manifest_row or tag.manifest, repository=RepositoryReference.for_id(tag.repository_id), ), ) - @classmethod - def for_repository_tag(cls, repository_tag, manifest_digest=None, legacy_image=None): - if repository_tag is None: - return None - - return Tag( - db_id=repository_tag.id, - name=repository_tag.name, - reversion=repository_tag.reversion, - lifetime_start_ts=repository_tag.lifetime_start_ts, - lifetime_end_ts=repository_tag.lifetime_end_ts, - lifetime_start_ms=repository_tag.lifetime_start_ts * 1000, - lifetime_end_ms=( - repository_tag.lifetime_end_ts * 1000 if repository_tag.lifetime_end_ts else None - ), - manifest_digest=manifest_digest, - inputs=dict( - legacy_image=legacy_image, - repository=RepositoryReference.for_id(repository_tag.repository_id), - ), - ) + @property + @requiresinput("manifest_row") + def _manifest_row(self, manifest_row): + """ + Returns the database Manifest object for this tag. + """ + return manifest_row @property - @requiresinput("manifest") - def _manifest(self, manifest): + @requiresinput("manifest_row") + @requiresinput("legacy_id_handler") + @optionalinput("legacy_image_row") + def manifest(self, manifest_row, legacy_id_handler, legacy_image_row): """ Returns the manifest for this tag. - - Will only apply to new-style OCI tags. """ - return manifest - - @property - @optionalinput("manifest") - def manifest(self, manifest): - """ - Returns the manifest for this tag or None if none. - - Will only apply to new-style OCI tags. - """ - return Manifest.for_manifest(manifest, self.legacy_image_if_present) + return Manifest.for_manifest( + manifest_row, legacy_id_handler, legacy_image_row=legacy_image_row + ) @property @requiresinput("repository") @@ -293,28 +266,6 @@ class Tag( """ return repository - @property - @requiresinput("legacy_image") - def legacy_image(self, legacy_image): - """ - Returns the legacy Docker V1-style image for this tag. - - Note that this will be None for tags whose manifests point to other manifests instead of - images. - """ - return legacy_image - - @property - @optionalinput("legacy_image") - def legacy_image_if_present(self, legacy_image): - """ - Returns the legacy Docker V1-style image for this tag. - - Note that this will be None for tags whose manifests point to other manifests instead of - images. - """ - return legacy_image - @property def id(self): """ @@ -322,31 +273,32 @@ class Tag( """ return self._db_id + @property + def manifest_layers_size(self): + """ Returns the compressed size of the layers of the manifest for the Tag or + None if none applicable or loaded. + """ + return self.manifest.layers_compressed_size -class Manifest(datatype("Manifest", ["digest", "media_type", "internal_manifest_bytes"])): + +class Manifest( + datatype( + "Manifest", + [ + "digest", + "media_type", + "config_media_type", + "_layers_compressed_size", + "internal_manifest_bytes", + ], + ) +): """ Manifest represents a manifest in a repository. """ @classmethod - def for_tag_manifest(cls, tag_manifest, legacy_image=None): - if tag_manifest is None: - return None - - return Manifest( - db_id=tag_manifest.id, - digest=tag_manifest.digest, - internal_manifest_bytes=Bytes.for_string_or_unicode(tag_manifest.json_data), - media_type=DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE, # Always in legacy. - inputs=dict( - legacy_image=legacy_image, - tag_manifest=True, - repository=RepositoryReference.for_id(tag_manifest.repository_id), - ), - ) - - @classmethod - def for_manifest(cls, manifest, legacy_image): + def for_manifest(cls, manifest, legacy_id_handler, legacy_image_row=None): if manifest is None: return None @@ -361,36 +313,15 @@ class Manifest(datatype("Manifest", ["digest", "media_type", "internal_manifest_ digest=manifest.digest, internal_manifest_bytes=manifest_bytes, media_type=ManifestTable.media_type.get_name(manifest.media_type_id), + _layers_compressed_size=manifest.layers_compressed_size, + config_media_type=manifest.config_media_type, inputs=dict( - legacy_image=legacy_image, - tag_manifest=False, + legacy_id_handler=legacy_id_handler, + legacy_image_row=legacy_image_row, repository=RepositoryReference.for_id(manifest.repository_id), ), ) - @property - @requiresinput("tag_manifest") - def _is_tag_manifest(self, tag_manifest): - return tag_manifest - - @property - @requiresinput("legacy_image") - def legacy_image(self, legacy_image): - """ - Returns the legacy Docker V1-style image for this manifest. - """ - return legacy_image - - @property - @optionalinput("legacy_image") - def legacy_image_if_present(self, legacy_image): - """ - Returns the legacy Docker V1-style image for this manifest. - - Note that this will be None for manifests that point to other manifests instead of images. - """ - return legacy_image - def get_parsed_manifest(self, validate=True): """ Returns the parsed manifest for this manifest. @@ -400,17 +331,6 @@ class Manifest(datatype("Manifest", ["digest", "media_type", "internal_manifest_ self.internal_manifest_bytes, self.media_type, validate=validate ) - @property - def layers_compressed_size(self): - """ - Returns the total compressed size of the layers in the manifest or None if this could not be - computed. - """ - try: - return self.get_parsed_manifest().layers_compressed_size - except ManifestException: - return None - @property def is_manifest_list(self): """ @@ -426,9 +346,67 @@ class Manifest(datatype("Manifest", ["digest", "media_type", "internal_manifest_ """ return repository + @optionalinput("legacy_image_row") + def _legacy_image_row(self, legacy_image_row): + return legacy_image_row + + @property + def layers_compressed_size(self): + # TODO: Simplify once we've stopped writing Image rows and we've backfilled the + # sizes. + + # First check the manifest itself, as all newly written manifests will have the + # size. + if self._layers_compressed_size is not None: + return self._layers_compressed_size + + # Secondly, check for the size of the legacy Image row. + legacy_image_row = self._legacy_image_row + if legacy_image_row: + return legacy_image_row.aggregate_size + + # Otherwise, return None. + return None + + @property + @requiresinput("legacy_id_handler") + def legacy_image_root_id(self, legacy_id_handler): + """ + Returns the legacy Docker V1-style image ID for this manifest. Note that an ID will + be returned even if the manifest does not support a legacy image. + """ + return legacy_id_handler.encode(self._db_id) + + def as_manifest(self): + """ Returns the manifest or legacy image as a manifest. """ + return self + + @property + @requiresinput("legacy_id_handler") + def _legacy_id_handler(self, legacy_id_handler): + return legacy_id_handler + + def lookup_legacy_image(self, layer_index, retriever): + """ Looks up and returns the legacy image for index-th layer in this manifest + or None if none. The indexes here are from leaf to root, with index 0 being + the leaf. + """ + # Retrieve the schema1 manifest. If none exists, legacy images are not supported. + parsed = self.get_parsed_manifest() + if parsed is None: + return None + + schema1 = parsed.get_schema1_manifest("$namespace", "$repo", "$tag", retriever) + if schema1 is None: + return None + + return LegacyImage.for_schema1_manifest_layer_index( + self, schema1, layer_index, self._legacy_id_handler + ) + class LegacyImage( - datatype( + namedtuple( "LegacyImage", [ "docker_image_id", @@ -437,8 +415,14 @@ class LegacyImage( "command", "image_size", "aggregate_size", - "uploading", + "blob", + "blob_digest", "v1_metadata_string", + # Internal fields. + "layer_index", + "manifest", + "parsed_manifest", + "id_handler", ], ) ): @@ -447,74 +431,80 @@ class LegacyImage( """ @classmethod - def for_image(cls, image, images_map=None, tags_map=None, blob=None): - if image is None: + def for_schema1_manifest_layer_index( + cls, manifest, parsed_manifest, layer_index, id_handler, blob=None + ): + assert parsed_manifest.schema_version == 1 + layers = parsed_manifest.layers + if layer_index >= len(layers): + return None + + # NOTE: Schema1 keeps its layers in the order from base to leaf, so we have + # to reverse our lookup order. + leaf_to_base = list(reversed(layers)) + + aggregated_size = sum( + [ + l.compressed_size + for index, l in enumerate(leaf_to_base) + if index >= layer_index and l.compressed_size is not None + ] + ) + + layer = leaf_to_base[layer_index] + synthetic_layer_id = id_handler.encode(manifest._db_id, layer_index) + + # Replace the image ID and parent ID with our synethetic IDs. + try: + parsed = json.loads(layer.raw_v1_metadata) + parsed["id"] = synthetic_layer_id + if layer_index < len(leaf_to_base) - 1: + parsed["parent"] = id_handler.encode(manifest._db_id, layer_index + 1) + except (ValueError, TypeError): return None return LegacyImage( - db_id=image.id, - inputs=dict( - images_map=images_map, - tags_map=tags_map, - ancestor_id_list=image.ancestor_id_list(), - blob=blob, - ), - docker_image_id=image.docker_image_id, - created=image.created, - comment=image.comment, - command=image.command, - v1_metadata_string=image.v1_json_metadata, - image_size=image.storage.image_size, - aggregate_size=image.aggregate_size, - uploading=image.storage.uploading, + docker_image_id=synthetic_layer_id, + created=layer.v1_metadata.created, + comment=layer.v1_metadata.comment, + command=layer.v1_metadata.command, + image_size=layer.compressed_size, + aggregate_size=aggregated_size, + blob=blob, + blob_digest=layer.digest, + v1_metadata_string=json.dumps(parsed), + layer_index=layer_index, + manifest=manifest, + parsed_manifest=parsed_manifest, + id_handler=id_handler, ) - @property - def id(self): - """ - Returns the database ID of the legacy image. - """ - return self._db_id + def with_blob(self, blob): + """ Sets the blob for the legacy image. """ + return self._replace(blob=blob) @property - @requiresinput("images_map") - @requiresinput("ancestor_id_list") - def parents(self, images_map, ancestor_id_list): - """ - Returns the parent images for this image. + def parent_image_id(self): + ancestor_ids = self.ancestor_ids + if not ancestor_ids: + return None - Raises an exception if the parents have not been loaded before this property is invoked. - Parents are returned starting at the leaf image. - """ - return [ - LegacyImage.for_image(images_map[ancestor_id], images_map=images_map) - for ancestor_id in reversed(ancestor_id_list) - if images_map.get(ancestor_id) - ] + return ancestor_ids[-1] @property - @requiresinput("blob") - def blob(self, blob): - """ - Returns the blob for this image. - - Raises an exception if the blob has not been loaded before this property is invoked. - """ - return blob + def ancestor_ids(self): + ancestor_ids = [] + for layer_index in range(self.layer_index + 1, len(self.parsed_manifest.layers)): + ancestor_ids.append(self.id_handler.encode(self.manifest._db_id, layer_index)) + return ancestor_ids @property - @requiresinput("tags_map") - def tags(self, tags_map): - """ - Returns the tags pointing to this image. + def full_image_id_chain(self): + return [self.docker_image_id] + self.ancestor_ids - Raises an exception if the tags have not been loaded before this property is invoked. - """ - tags = tags_map.get(self._db_id) - if not tags: - return [] - - return [Tag.for_tag(tag) for tag in tags] + def as_manifest(self): + """ Returns the parent manifest for the legacy image. """ + return self.manifest @unique @@ -579,7 +569,6 @@ class Blob( """ Returns the path of this blob in storage. """ - # TODO: change this to take in the storage engine? return storage_path @property @@ -591,27 +580,6 @@ class Blob( return placements -class DerivedImage(datatype("DerivedImage", ["verb", "varying_metadata", "blob"])): - """ - DerivedImage represents an image derived from a manifest via some form of verb. - """ - - @classmethod - def for_derived_storage(cls, derived, verb, varying_metadata, blob): - return DerivedImage( - db_id=derived.id, verb=verb, varying_metadata=varying_metadata, blob=blob - ) - - @property - def unique_id(self): - """ - Returns a unique ID for this derived image. - - This call will consistently produce the same unique ID across calls in the same code base. - """ - return hashlib.sha256(("%s:%s" % (self.verb, self._db_id)).encode("utf-8")).hexdigest() - - class BlobUpload( datatype( "BlobUpload", @@ -662,13 +630,6 @@ class LikelyVulnerableTag(datatype("LikelyVulnerableTag", ["layer_id", "name"])) db_id=tag.id, name=tag.name, layer_id=layer_id, inputs=dict(repository=repository) ) - @classmethod - def for_repository_tag(cls, tag, repository): - tag_layer_id = "%s.%s" % (tag.image.docker_image_id, tag.image.storage.uuid) - return LikelyVulnerableTag( - db_id=tag.id, name=tag.name, layer_id=tag_layer_id, inputs=dict(repository=repository) - ) - @property @requiresinput("repository") def repository(self, repository): diff --git a/data/registry_model/interface.py b/data/registry_model/interface.py index 26c5b1519..c70b2a7b7 100644 --- a/data/registry_model/interface.py +++ b/data/registry_model/interface.py @@ -14,16 +14,13 @@ class RegistryDataInterface(object): @abstractmethod def get_tag_legacy_image_id(self, repository_ref, tag_name, storage): """ - Returns the legacy image ID for the tag with a legacy images in the repository. - - Returns None if None. + Returns the legacy image ID for the tag in the repository or None if none. """ @abstractmethod def get_legacy_tags_map(self, repository_ref, storage): """ - Returns a map from tag name to its legacy image ID, for all tags with legacy images in the - repository. + Returns a map from tag name to its legacy image ID, for all tags in the repository. Note that this can be a *very* heavy operation. """ @@ -51,19 +48,14 @@ class RegistryDataInterface(object): """ @abstractmethod - def get_manifest_for_tag(self, tag, backfill_if_necessary=False, include_legacy_image=False): + def get_manifest_for_tag(self, tag): """ Returns the manifest associated with the given tag. """ @abstractmethod def lookup_manifest_by_digest( - self, - repository_ref, - manifest_digest, - allow_dead=False, - include_legacy_image=False, - require_available=False, + self, repository_ref, manifest_digest, allow_dead=False, require_available=False, ): """ Looks up the manifest with the given digest under the given repository and returns it or @@ -92,15 +84,7 @@ class RegistryDataInterface(object): """ @abstractmethod - def get_legacy_images(self, repository_ref): - """ - Returns an iterator of all the LegacyImage's defined in the matching repository. - """ - - @abstractmethod - def get_legacy_image( - self, repository_ref, docker_image_id, include_parents=False, include_blob=False - ): + def get_legacy_image(self, repository_ref, docker_image_id, storage, include_blob=False): """ Returns the matching LegacyImages under the matching repository, if any. @@ -170,12 +154,12 @@ class RegistryDataInterface(object): """ @abstractmethod - def list_all_active_repository_tags(self, repository_ref, include_legacy_images=False): + def list_all_active_repository_tags(self, repository_ref): """ Returns a list of all the active tags in the repository. Note that this is a *HEAVY* operation on repositories with a lot of tags, and should only be - used for testing or where other more specific operations are not possible. + used for testing or legacy operations. """ @abstractmethod @@ -204,7 +188,7 @@ class RegistryDataInterface(object): """ @abstractmethod - def get_repo_tag(self, repository_ref, tag_name, include_legacy_image=False): + def get_repo_tag(self, repository_ref, tag_name): """ Returns the latest, *active* tag found in the repository, with the matching name or None if none. @@ -259,12 +243,6 @@ class RegistryDataInterface(object): previous expiration timestamp in seconds (if any), and whether the operation succeeded. """ - @abstractmethod - def get_legacy_images_owned_by_tag(self, tag): - """ - Returns all legacy images *solely owned and used* by the given tag. - """ - @abstractmethod def get_security_status(self, manifest_or_legacy_image): """ @@ -319,57 +297,6 @@ class RegistryDataInterface(object): `image.docker.types.ManifestImageLayer`. Should not be called for a manifest list. """ - @abstractmethod - def lookup_derived_image( - self, manifest, verb, storage, varying_metadata=None, include_placements=False - ): - """ - Looks up the derived image for the given manifest, verb and optional varying metadata and - returns it or None if none. - """ - - @abstractmethod - def lookup_or_create_derived_image( - self, - manifest, - verb, - storage_location, - storage, - varying_metadata=None, - include_placements=False, - ): - """ - Looks up the derived image for the given maniest, verb and optional varying metadata and - returns it. - - If none exists, a new derived image is created. - """ - - @abstractmethod - def get_derived_image_signature(self, derived_image, signer_name): - """ - Returns the signature associated with the derived image and a specific signer or None if - none. - """ - - @abstractmethod - def set_derived_image_signature(self, derived_image, signer_name, signature): - """ - Sets the calculated signature for the given derived image and signer to that specified. - """ - - @abstractmethod - def delete_derived_image(self, derived_image): - """ - Deletes a derived image and all of its storage. - """ - - @abstractmethod - def set_derived_image_size(self, derived_image, compressed_size): - """ - Sets the compressed size on the given derived image. - """ - @abstractmethod def get_repo_blob_by_digest(self, repository_ref, blob_digest, include_placements=False): """ @@ -474,17 +401,14 @@ class RegistryDataInterface(object): If not possible, or an error occurs, returns None. """ - @abstractmethod - def yield_tags_for_vulnerability_notification(self, layer_id_pairs): - """ - Yields tags that contain one (or more) of the given layer ID pairs, in repositories which - have been registered for vulnerability_found notifications. - - Returns an iterator of LikelyVulnerableTag instances. - """ - @abstractmethod def find_repository_with_garbage(self, limit_to_gc_policy_s): """ Returns a repository reference to a repository that contains garbage for collection or None if none. """ + + @abstractmethod + def populate_legacy_images_for_testing(self, manifest, storage): + """ Populates legacy images for the given manifest, for testing only. This call + will fail if called under non-testing code. + """ diff --git a/data/registry_model/manifestbuilder.py b/data/registry_model/manifestbuilder.py index 4946974b2..750d9d1bc 100644 --- a/data/registry_model/manifestbuilder.py +++ b/data/registry_model/manifestbuilder.py @@ -85,8 +85,8 @@ class _ManifestBuilder(object): Returns the tags committed by this builder, if any. """ return [ - registry_model.get_repo_tag(self._repository_ref, tag_name, include_legacy_image=True) - for tag_name in list(self._builder_state.tags.keys()) + registry_model.get_repo_tag(self._repository_ref, tag_name) + for tag_name in self._builder_state.tags.keys() ] def start_layer( diff --git a/data/registry_model/registry_oci_model.py b/data/registry_model/registry_oci_model.py index cfa4c314e..3acbeaeab 100644 --- a/data/registry_model/registry_oci_model.py +++ b/data/registry_model/registry_oci_model.py @@ -25,13 +25,13 @@ from data.registry_model.datatypes import ( SecurityScanStatus, Blob, BlobUpload, - DerivedImage, ShallowTag, LikelyVulnerableTag, RepositoryReference, ManifestLayer, ) from data.registry_model.label_handlers import apply_label_to_manifest +from data.registry_model.shared import SyntheticIDHandler from image.shared import ManifestException from image.docker.schema1 import ( DOCKER_SCHEMA1_CONTENT_TYPES, @@ -42,9 +42,6 @@ from image.docker.schema2 import EMPTY_LAYER_BLOB_DIGEST logger = logging.getLogger(__name__) -# The maximum size for generated manifest after which we remove extra metadata. -MAXIMUM_GENERATED_MANIFEST_SIZE = 3 * 1024 * 1024 # 3 MB - class OCIModel(RegistryDataInterface): """ @@ -52,78 +49,71 @@ class OCIModel(RegistryDataInterface): changed to support the OCI specification. """ + def __init__(self): + self._legacy_image_id_handler = SyntheticIDHandler() + + def set_id_hash_salt(self, id_hash_salt): + self._legacy_image_id_handler = SyntheticIDHandler(id_hash_salt) + + def _resolve_legacy_image_id_to_manifest_row(self, legacy_image_id): + decoded = self._legacy_image_id_handler.decode(legacy_image_id) + if len(decoded) == 0: + return (None, None) + + manifest_id, layer_index = decoded + if manifest_id is None: + return (None, None) + + try: + return database.Manifest.get(id=manifest_id), layer_index + except database.Manifest.DoesNotExist: + return (None, None) + + def _resolve_legacy_image_id(self, legacy_image_id): + """ Decodes the given legacy image ID and returns the manifest to which it points, + as well as the layer index for the image. If invalid, or the manifest was not found, + returns (None, None). + """ + manifest, layer_index = self._resolve_legacy_image_id_to_manifest_row(legacy_image_id) + if manifest is None: + return (None, None) + + return Manifest.for_manifest(manifest, self._legacy_image_id_handler), layer_index + def get_tag_legacy_image_id(self, repository_ref, tag_name, storage): """ - Returns the legacy image ID for the tag with a legacy images in the repository. - - Returns None if None. + Returns the legacy image ID for the tag in the repository. If there is no legacy image, + returns None. """ - tag = self.get_repo_tag(repository_ref, tag_name, include_legacy_image=True) + tag = self.get_repo_tag(repository_ref, tag_name) if tag is None: return None - if tag.legacy_image_if_present is not None: - return tag.legacy_image_if_present.docker_image_id + retriever = RepositoryContentRetriever(repository_ref.id, storage) + legacy_image = tag.manifest.lookup_legacy_image(0, retriever) + if legacy_image is None: + return None - if tag.manifest.is_manifest_list: - # See if we can lookup a schema1 legacy image. - v1_compatible = self.get_schema1_parsed_manifest(tag.manifest, "", "", "", storage) - if v1_compatible is not None: - return v1_compatible.leaf_layer_v1_image_id - - return None + return legacy_image.docker_image_id def get_legacy_tags_map(self, repository_ref, storage): """ - Returns a map from tag name to its legacy image ID, for all tags with legacy images in the + Returns a map from tag name to its legacy image ID, for all tags in the repository. Note that this can be a *very* heavy operation. """ tags = oci.tag.list_alive_tags(repository_ref._db_id) - legacy_images_map = oci.tag.get_legacy_images_for_tags(tags) - tags_map = {} for tag in tags: - legacy_image = legacy_images_map.get(tag.id) - if legacy_image is not None: - tags_map[tag.name] = legacy_image.docker_image_id - else: - manifest = Manifest.for_manifest(tag.manifest, None) - if legacy_image is None and manifest.is_manifest_list: - # See if we can lookup a schema1 legacy image. - v1_compatible = self.get_schema1_parsed_manifest(manifest, "", "", "", storage) - if v1_compatible is not None: - v1_id = v1_compatible.leaf_layer_v1_image_id - if v1_id is not None: - tags_map[tag.name] = v1_id + root_id = Manifest.for_manifest( + tag.manifest, self._legacy_image_id_handler + ).legacy_image_root_id + if root_id is not None: + tags_map[tag.name] = root_id return tags_map - def _get_legacy_compatible_image_for_manifest(self, manifest, storage): - # Check for a legacy image directly on the manifest. - if not manifest.is_manifest_list: - return oci.shared.get_legacy_image_for_manifest(manifest._db_id) - - # Otherwise, lookup a legacy image associated with the v1-compatible manifest - # in the list. - try: - manifest_obj = database.Manifest.get(id=manifest._db_id) - except database.Manifest.DoesNotExist: - logger.exception("Could not find manifest for manifest `%s`", manifest._db_id) - return None - - # See if we can lookup a schema1 legacy image. - v1_compatible = self.get_schema1_parsed_manifest(manifest, "", "", "", storage) - if v1_compatible is None: - return None - - v1_id = v1_compatible.leaf_layer_v1_image_id - if v1_id is None: - return None - - return model.image.get_image(manifest_obj.repository_id, v1_id) - def find_matching_tag(self, repository_ref, tag_names): """ Finds an alive tag in the repository matching one of the given tag names and returns it or @@ -131,7 +121,7 @@ class OCIModel(RegistryDataInterface): """ found_tag = oci.tag.find_matching_tag(repository_ref._db_id, tag_names) assert found_tag is None or not found_tag.hidden - return Tag.for_tag(found_tag) + return Tag.for_tag(found_tag, self._legacy_image_id_handler) def get_most_recent_tag(self, repository_ref): """ @@ -141,27 +131,17 @@ class OCIModel(RegistryDataInterface): """ found_tag = oci.tag.get_most_recent_tag(repository_ref._db_id) assert found_tag is None or not found_tag.hidden - return Tag.for_tag(found_tag) + return Tag.for_tag(found_tag, self._legacy_image_id_handler) - def get_manifest_for_tag(self, tag, backfill_if_necessary=False, include_legacy_image=False): + def get_manifest_for_tag(self, tag): """ Returns the manifest associated with the given tag. """ assert tag is not None - - legacy_image = None - if include_legacy_image: - legacy_image = oci.shared.get_legacy_image_for_manifest(tag._manifest) - - return Manifest.for_manifest(tag._manifest, LegacyImage.for_image(legacy_image)) + return tag.manifest def lookup_manifest_by_digest( - self, - repository_ref, - manifest_digest, - allow_dead=False, - include_legacy_image=False, - require_available=False, + self, repository_ref, manifest_digest, allow_dead=False, require_available=False, ): """ Looks up the manifest with the given digest under the given repository and returns it or @@ -176,19 +156,7 @@ class OCIModel(RegistryDataInterface): if manifest is None: return None - legacy_image = None - if include_legacy_image: - try: - legacy_image_id = database.ManifestLegacyImage.get( - manifest=manifest - ).image.docker_image_id - legacy_image = self.get_legacy_image( - repository_ref, legacy_image_id, include_parents=True - ) - except database.ManifestLegacyImage.DoesNotExist: - pass - - return Manifest.for_manifest(manifest, legacy_image) + return Manifest.for_manifest(manifest, self._legacy_image_id_handler) def create_manifest_label(self, manifest, key, value, source_type_name, media_type_name=None): """ @@ -276,22 +244,15 @@ class OCIModel(RegistryDataInterface): tags = oci.tag.lookup_alive_tags_shallow(repository_ref._db_id, start_pagination_id, limit) return [ShallowTag.for_tag(tag) for tag in tags] - def list_all_active_repository_tags(self, repository_ref, include_legacy_images=False): + def list_all_active_repository_tags(self, repository_ref): """ Returns a list of all the active tags in the repository. Note that this is a *HEAVY* operation on repositories with a lot of tags, and should only be - used for testing or where other more specific operations are not possible. + used for testing or legacy operations. """ tags = list(oci.tag.list_alive_tags(repository_ref._db_id)) - legacy_images_map = {} - if include_legacy_images: - legacy_images_map = oci.tag.get_legacy_images_for_tags(tags) - - return [ - Tag.for_tag(tag, legacy_image=LegacyImage.for_image(legacy_images_map.get(tag.id))) - for tag in tags - ] + return [Tag.for_tag(tag, self._legacy_image_id_handler) for tag in tags] def list_repository_tag_history( self, @@ -312,11 +273,19 @@ class OCIModel(RegistryDataInterface): repository_ref._db_id, page, size, specific_tag_name, active_tags_only, since_time_ms ) - # TODO: do we need legacy images here? - legacy_images_map = oci.tag.get_legacy_images_for_tags(tags) + # TODO: Remove this once the layers compressed sizes have been fully backfilled. + tags_missing_sizes = [tag for tag in tags if tag.manifest.layers_compressed_size is None] + legacy_images_map = {} + if tags_missing_sizes: + legacy_images_map = oci.tag.get_legacy_images_for_tags(tags_missing_sizes) + return ( [ - Tag.for_tag(tag, LegacyImage.for_image(legacy_images_map.get(tag.id))) + Tag.for_tag( + tag, + self._legacy_image_id_handler, + legacy_image_row=legacy_images_map.get(tag.id), + ) for tag in tags ], has_more, @@ -342,7 +311,7 @@ class OCIModel(RegistryDataInterface): return {repo_id: toSeconds(ms) for repo_id, ms in list(last_modified.items())} - def get_repo_tag(self, repository_ref, tag_name, include_legacy_image=False): + def get_repo_tag(self, repository_ref, tag_name): """ Returns the latest, *active* tag found in the repository, with the matching name or None if none. @@ -353,12 +322,7 @@ class OCIModel(RegistryDataInterface): if tag is None: return None - legacy_image = None - if include_legacy_image: - legacy_images = oci.tag.get_legacy_images_for_tags([tag]) - legacy_image = legacy_images.get(tag.id) - - return Tag.for_tag(tag, legacy_image=LegacyImage.for_image(legacy_image)) + return Tag.for_tag(tag, self._legacy_image_id_handler) def create_manifest_and_retarget_tag( self, repository_ref, manifest_interface_instance, tag_name, storage, raise_on_error=False @@ -395,9 +359,9 @@ class OCIModel(RegistryDataInterface): if tag is None: return (None, None) - legacy_image = oci.shared.get_legacy_image_for_manifest(created_manifest.manifest) - li = LegacyImage.for_image(legacy_image) - wrapped_manifest = Manifest.for_manifest(created_manifest.manifest, li) + wrapped_manifest = Manifest.for_manifest( + created_manifest.manifest, self._legacy_image_id_handler + ) # Apply any labels that should modify the created tag. if created_manifest.labels_to_apply: @@ -407,7 +371,12 @@ class OCIModel(RegistryDataInterface): # Reload the tag in case any updates were applied. tag = database.Tag.get(id=tag.id) - return (wrapped_manifest, Tag.for_tag(tag, li)) + return ( + wrapped_manifest, + Tag.for_tag( + tag, self._legacy_image_id_handler, manifest_row=created_manifest.manifest + ), + ) def retarget_tag( self, @@ -427,62 +396,37 @@ class OCIModel(RegistryDataInterface): """ with db_disallow_replica_use(): assert legacy_manifest_key is not None - manifest_id = manifest_or_legacy_image._db_id - if isinstance(manifest_or_legacy_image, LegacyImage): - # If a legacy image was required, build a new manifest for it and move the tag to that. + manifest = manifest_or_legacy_image.as_manifest() + manifest_id = manifest._db_id + + # If the manifest is a schema 1 manifest and its tag name does not match that + # specified, then we need to create a new manifest, but with that tag name. + if manifest.media_type in DOCKER_SCHEMA1_CONTENT_TYPES: try: - image_row = database.Image.get(id=manifest_or_legacy_image._db_id) - except database.Image.DoesNotExist: + parsed = manifest.get_parsed_manifest() + except ManifestException: + logger.exception( + "Could not parse manifest `%s` in retarget_tag", manifest._db_id, + ) return None - manifest_instance = self._build_manifest_for_legacy_image(tag_name, image_row) - if manifest_instance is None: - return None + if parsed.tag != tag_name: + logger.debug( + "Rewriting manifest `%s` for tag named `%s`", manifest._db_id, tag_name, + ) - created = oci.manifest.get_or_create_manifest( - repository_ref._db_id, manifest_instance, storage - ) - if created is None: - return None + repository_id = repository_ref._db_id + updated = parsed.with_tag_name(tag_name, legacy_manifest_key) + assert updated.is_signed - manifest_id = created.manifest.id - else: - # If the manifest is a schema 1 manifest and its tag name does not match that - # specified, then we need to create a new manifest, but with that tag name. - if manifest_or_legacy_image.media_type in DOCKER_SCHEMA1_CONTENT_TYPES: - try: - parsed = manifest_or_legacy_image.get_parsed_manifest() - except ManifestException: - logger.exception( - "Could not parse manifest `%s` in retarget_tag", - manifest_or_legacy_image._db_id, - ) + created = oci.manifest.get_or_create_manifest(repository_id, updated, storage) + if created is None: return None - if parsed.tag != tag_name: - logger.debug( - "Rewriting manifest `%s` for tag named `%s`", - manifest_or_legacy_image._db_id, - tag_name, - ) - - repository_id = repository_ref._db_id - updated = parsed.with_tag_name(tag_name, legacy_manifest_key) - assert updated.is_signed - - created = oci.manifest.get_or_create_manifest( - repository_id, updated, storage - ) - if created is None: - return None - - manifest_id = created.manifest.id + manifest_id = created.manifest.id tag = oci.tag.retarget_tag(tag_name, manifest_id, is_reversion=is_reversion) - legacy_image = LegacyImage.for_image( - oci.shared.get_legacy_image_for_manifest(manifest_id) - ) - return Tag.for_tag(tag, legacy_image) + return Tag.for_tag(tag, self._legacy_image_id_handler) def delete_tag(self, repository_ref, tag_name): """ @@ -496,18 +440,18 @@ class OCIModel(RegistryDataInterface): msg = "Invalid repository tag '%s' on repository" % tag_name raise DataModelException(msg) - return Tag.for_tag(deleted_tag) + return Tag.for_tag(deleted_tag, self._legacy_image_id_handler) def delete_tags_for_manifest(self, manifest): """ Deletes all tags pointing to the given manifest, making the manifest inaccessible for pulling. - Returns the tags deleted, if any. Returns None on error. + Returns the tags (ShallowTag) deleted. Returns None on error. """ with db_disallow_replica_use(): deleted_tags = oci.tag.delete_tags_for_manifest(manifest._db_id) - return [Tag.for_tag(tag) for tag in deleted_tags] + return [ShallowTag.for_tag(tag) for tag in deleted_tags] def change_repository_tag_expiration(self, tag, expiration_date): """ @@ -519,75 +463,15 @@ class OCIModel(RegistryDataInterface): with db_disallow_replica_use(): return oci.tag.change_tag_expiration(tag._db_id, expiration_date) - def get_legacy_images_owned_by_tag(self, tag): - """ - Returns all legacy images *solely owned and used* by the given tag. - """ - tag_obj = oci.tag.get_tag_by_id(tag._db_id) - if tag_obj is None: - return None - - tags = oci.tag.list_alive_tags(tag_obj.repository_id) - legacy_images = oci.tag.get_legacy_images_for_tags(tags) - - tag_legacy_image = legacy_images.get(tag._db_id) - if tag_legacy_image is None: - return None - - assert isinstance(tag_legacy_image, Image) - - # Collect the IDs of all images that the tag uses. - tag_image_ids = set() - tag_image_ids.add(tag_legacy_image.id) - tag_image_ids.update(tag_legacy_image.ancestor_id_list()) - - # Remove any images shared by other tags. - for current in tags: - if current == tag_obj: - continue - - current_image = legacy_images.get(current.id) - if current_image is None: - continue - - tag_image_ids.discard(current_image.id) - tag_image_ids = tag_image_ids.difference(current_image.ancestor_id_list()) - if not tag_image_ids: - return [] - - if not tag_image_ids: - return [] - - # Load the images we need to return. - images = database.Image.select().where(database.Image.id << list(tag_image_ids)) - all_image_ids = set() - for image in images: - all_image_ids.add(image.id) - all_image_ids.update(image.ancestor_id_list()) - - # Build a map of all the images and their parents. - images_map = {} - all_images = database.Image.select().where(database.Image.id << list(all_image_ids)) - for image in all_images: - images_map[image.id] = image - - return [LegacyImage.for_image(image, images_map=images_map) for image in images] - def get_security_status(self, manifest_or_legacy_image): """ Returns the security status for the given manifest or legacy image or None if none. """ - image = None - - if isinstance(manifest_or_legacy_image, Manifest): - image = oci.shared.get_legacy_image_for_manifest(manifest_or_legacy_image._db_id) - if image is None: - return SecurityScanStatus.UNSUPPORTED - else: - try: - image = database.Image.get(id=manifest_or_legacy_image._db_id) - except database.Image.DoesNotExist: - return None + # TODO: change from using the Image row once we've moved all security info into MSS. + manifest_id = manifest_or_legacy_image.as_manifest()._db_id + image = oci.shared.get_legacy_image_for_manifest(manifest_id) + if image is None: + return SecurityScanStatus.UNSUPPORTED if image.security_indexed_engine is not None and image.security_indexed_engine >= 0: return ( @@ -602,22 +486,16 @@ class OCIModel(RegistryDataInterface): re-indexed. """ with db_disallow_replica_use(): - image = None + # TODO: change from using the Image row once we've moved all security info into MSS. + manifest_id = manifest_or_legacy_image.as_manifest()._db_id + image = oci.shared.get_legacy_image_for_manifest(manifest_id) + if image is None: + return None - if isinstance(manifest_or_legacy_image, Manifest): - image = oci.shared.get_legacy_image_for_manifest(manifest_or_legacy_image._db_id) - if image is None: - return None - else: - try: - image = database.Image.get(id=manifest_or_legacy_image._db_id) - except database.Image.DoesNotExist: - return None - - assert image - image.security_indexed = False - image.security_indexed_engine = IMAGE_NOT_SCANNED_ENGINE_VERSION - image.save() + assert image + image.security_indexed = False + image.security_indexed_engine = IMAGE_NOT_SCANNED_ENGINE_VERSION + image.save() def list_manifest_layers(self, manifest, storage, include_placements=False): try: @@ -633,48 +511,9 @@ class OCIModel(RegistryDataInterface): return None return self._list_manifest_layers( - manifest_obj.repository_id, parsed, storage, include_placements, by_manifest=True + manifest_obj.repository_id, parsed, storage, include_placements ) - def lookup_derived_image( - self, manifest, verb, storage, varying_metadata=None, include_placements=False - ): - """ - Looks up the derived image for the given manifest, verb and optional varying metadata and - returns it or None if none. - """ - legacy_image = self._get_legacy_compatible_image_for_manifest(manifest, storage) - if legacy_image is None: - return None - - derived = model.image.find_derived_storage_for_image(legacy_image, verb, varying_metadata) - return self._build_derived(derived, verb, varying_metadata, include_placements) - - def lookup_or_create_derived_image( - self, - manifest, - verb, - storage_location, - storage, - varying_metadata=None, - include_placements=False, - ): - """ - Looks up the derived image for the given maniest, verb and optional varying metadata and - returns it. - - If none exists, a new derived image is created. - """ - with db_disallow_replica_use(): - legacy_image = self._get_legacy_compatible_image_for_manifest(manifest, storage) - if legacy_image is None: - return None - - derived = model.image.find_or_create_derived_storage( - legacy_image, verb, storage_location, varying_metadata - ) - return self._build_derived(derived, verb, varying_metadata, include_placements) - def set_tags_expiration_for_manifest(self, manifest, expiration_sec): """ Sets the expiration on all tags that point to the given manifest to that specified. @@ -737,9 +576,7 @@ class OCIModel(RegistryDataInterface): if created_manifest is None: return None - legacy_image = oci.shared.get_legacy_image_for_manifest(created_manifest.manifest) - li = LegacyImage.for_image(legacy_image) - return Manifest.for_manifest(created_manifest.manifest, li) + return Manifest.for_manifest(created_manifest.manifest, self._legacy_image_id_handler) def get_repo_blob_by_digest(self, repository_ref, blob_digest, include_placements=False): """ @@ -777,11 +614,7 @@ class OCIModel(RegistryDataInterface): specified). """ return self._list_manifest_layers( - repository_ref._db_id, - parsed_manifest, - storage, - include_placements=include_placements, - by_manifest=True, + repository_ref._db_id, parsed_manifest, storage, include_placements=include_placements, ) def get_manifest_local_blobs(self, manifest, include_placements=False): @@ -794,25 +627,9 @@ class OCIModel(RegistryDataInterface): return None return self._get_manifest_local_blobs( - manifest, manifest_row.repository_id, include_placements, by_manifest=True + manifest, manifest_row.repository_id, include_placements ) - def yield_tags_for_vulnerability_notification(self, layer_id_pairs): - """ - Yields tags that contain one (or more) of the given layer ID pairs, in repositories which - have been registered for vulnerability_found notifications. - - Returns an iterator of LikelyVulnerableTag instances. - """ - for docker_image_id, storage_uuid in layer_id_pairs: - tags = oci.tag.lookup_notifiable_tags_for_legacy_image( - docker_image_id, storage_uuid, "vulnerability_found" - ) - for tag in tags: - yield LikelyVulnerableTag.for_tag( - tag, tag.repository, docker_image_id, storage_uuid - ) - def find_repository_with_garbage(self, limit_to_gc_policy_s): repo = model.oci.tag.find_repository_with_garbage(limit_to_gc_policy_s) if repo is None: @@ -849,66 +666,6 @@ class OCIModel(RegistryDataInterface): namespace = model.user.get_namespace_user(namespace_name) return namespace is not None and namespace.enabled - def get_derived_image_signature(self, derived_image, signer_name): - """ - Returns the signature associated with the derived image and a specific signer or None if - none. - """ - try: - derived_storage = database.DerivedStorageForImage.get(id=derived_image._db_id) - except database.DerivedStorageForImage.DoesNotExist: - return None - - storage = derived_storage.derivative - signature_entry = model.storage.lookup_storage_signature(storage, signer_name) - if signature_entry is None: - return None - - return signature_entry.signature - - def set_derived_image_signature(self, derived_image, signer_name, signature): - """ - Sets the calculated signature for the given derived image and signer to that specified. - """ - with db_disallow_replica_use(): - try: - derived_storage = database.DerivedStorageForImage.get(id=derived_image._db_id) - except database.DerivedStorageForImage.DoesNotExist: - return None - - storage = derived_storage.derivative - signature_entry = model.storage.find_or_create_storage_signature(storage, signer_name) - signature_entry.signature = signature - signature_entry.uploading = False - signature_entry.save() - - def delete_derived_image(self, derived_image): - """ - Deletes a derived image and all of its storage. - """ - with db_disallow_replica_use(): - try: - derived_storage = database.DerivedStorageForImage.get(id=derived_image._db_id) - except database.DerivedStorageForImage.DoesNotExist: - return None - - model.image.delete_derived_storage(derived_storage) - - def set_derived_image_size(self, derived_image, compressed_size): - """ - Sets the compressed size on the given derived image. - """ - with db_disallow_replica_use(): - try: - derived_storage = database.DerivedStorageForImage.get(id=derived_image._db_id) - except database.DerivedStorageForImage.DoesNotExist: - return None - - storage_entry = derived_storage.derivative - storage_entry.image_size = compressed_size - storage_entry.uploading = False - storage_entry.save() - def lookup_cached_active_repository_tags( self, model_cache, repository_ref, start_pagination_id, limit ): @@ -1098,68 +855,41 @@ class OCIModel(RegistryDataInterface): ) return bool(storage) - def get_legacy_images(self, repository_ref): + def get_legacy_image(self, repository_ref, docker_image_id, storage, include_blob=False): """ - Returns an iterator of all the LegacyImage's defined in the matching repository. - """ - repo = model.repository.lookup_repository(repository_ref._db_id) - if repo is None: - return None - - all_images = model.image.get_repository_images_without_placements(repo) - all_images_map = {image.id: image for image in all_images} - - all_tags = model.oci.tag.list_alive_tags(repo) - tags_by_image_id = defaultdict(list) - for tag in all_tags: - try: - mli = database.ManifestLegacyImage.get(manifest=tag.manifest_id) - tags_by_image_id[mli.image_id].append(tag) - except database.ManifestLegacyImage.DoesNotExist: - continue - - return [ - LegacyImage.for_image(image, images_map=all_images_map, tags_map=tags_by_image_id) - for image in all_images - ] - - def get_legacy_image( - self, repository_ref, docker_image_id, include_parents=False, include_blob=False - ): - """ - Returns the matching LegacyImages under the matching repository, if any. + Returns the matching LegacyImage under the matching repository, if any. If none, returns None. """ - repo = model.repository.lookup_repository(repository_ref._db_id) - if repo is None: + retriever = RepositoryContentRetriever(repository_ref._db_id, storage) + + # Resolves the manifest and the layer index from the synthetic ID. + manifest, layer_index = self._resolve_legacy_image_id(docker_image_id) + if manifest is None: return None - image = model.image.get_image(repository_ref._db_id, docker_image_id) - if image is None: - return None + # Lookup the legacy image for the index. + legacy_image = manifest.lookup_legacy_image(layer_index, retriever) + if legacy_image is None or not include_blob: + return legacy_image - parent_images_map = None - if include_parents: - parent_images = model.image.get_parent_images( - repo.namespace_user.username, repo.name, image + # If a blob was requested, load it into the legacy image. + return legacy_image.with_blob( + self.get_repo_blob_by_digest( + repository_ref, legacy_image.blob_digest, include_placements=True ) - parent_images_map = {image.id: image for image in parent_images} + ) - blob = None - if include_blob: - placements = list(model.storage.get_storage_locations(image.storage.uuid)) - blob = Blob.for_image_storage( - image.storage, - storage_path=model.storage.get_layer_path(image.storage), - placements=placements, - ) + def populate_legacy_images_for_testing(self, manifest, storage): + """ Populates legacy images for the given manifest, for testing only. This call + will fail if called under non-testing code. + """ + manifest_row = database.Manifest.get(id=manifest._db_id) + oci.manifest.populate_legacy_images_for_testing( + manifest_row, manifest.get_parsed_manifest(), storage + ) - return LegacyImage.for_image(image, images_map=parent_images_map, blob=blob) - - def _get_manifest_local_blobs( - self, manifest, repo_id, include_placements=False, by_manifest=False - ): + def _get_manifest_local_blobs(self, manifest, repo_id, include_placements=False): parsed = manifest.get_parsed_manifest() if parsed is None: return None @@ -1168,9 +898,7 @@ class OCIModel(RegistryDataInterface): if not len(local_blob_digests): return [] - blob_query = self._lookup_repo_storages_by_content_checksum( - repo_id, local_blob_digests, by_manifest=by_manifest - ) + blob_query = self._lookup_repo_storages_by_content_checksum(repo_id, local_blob_digests) blobs = [] for image_storage in blob_query: placements = None @@ -1186,9 +914,7 @@ class OCIModel(RegistryDataInterface): return blobs - def _list_manifest_layers( - self, repo_id, parsed, storage, include_placements=False, by_manifest=False - ): + def _list_manifest_layers(self, repo_id, parsed, storage, include_placements=False): """ Returns an *ordered list* of the layers found in the manifest, starting at the base and working towards the leaf, including the associated Blob and its placements (if specified). @@ -1206,9 +932,7 @@ class OCIModel(RegistryDataInterface): blob_digests.append(EMPTY_LAYER_BLOB_DIGEST) if blob_digests: - blob_query = self._lookup_repo_storages_by_content_checksum( - repo_id, blob_digests, by_manifest=by_manifest - ) + blob_query = self._lookup_repo_storages_by_content_checksum(repo_id, blob_digests) storage_map = {blob.content_checksum: blob for blob in blob_query} layers = parsed.get_layers(retriever) @@ -1246,84 +970,6 @@ class OCIModel(RegistryDataInterface): return manifest_layers - def _build_derived(self, derived, verb, varying_metadata, include_placements): - if derived is None: - return None - - derived_storage = derived.derivative - placements = None - if include_placements: - placements = list(model.storage.get_storage_locations(derived_storage.uuid)) - - blob = Blob.for_image_storage( - derived_storage, - storage_path=model.storage.get_layer_path(derived_storage), - placements=placements, - ) - - return DerivedImage.for_derived_storage(derived, verb, varying_metadata, blob) - - def _build_manifest_for_legacy_image(self, tag_name, legacy_image_row): - import features - - from app import app, docker_v2_signing_key - - repo = legacy_image_row.repository - namespace_name = repo.namespace_user.username - repo_name = repo.name - - # Find the v1 metadata for this image and its parents. - try: - parents = model.image.get_parent_images(namespace_name, repo_name, legacy_image_row) - except model.DataModelException: - logger.exception( - "Could not load parent images for legacy image %s", legacy_image_row.id - ) - return None - - # If the manifest is being generated under the library namespace, then we make its namespace - # empty. - manifest_namespace = namespace_name - if features.LIBRARY_SUPPORT and namespace_name == app.config["LIBRARY_NAMESPACE"]: - manifest_namespace = "" - - # Create and populate the manifest builder - builder = DockerSchema1ManifestBuilder(manifest_namespace, repo_name, tag_name) - - # Add the leaf layer - builder.add_layer( - legacy_image_row.storage.content_checksum, legacy_image_row.v1_json_metadata - ) - if legacy_image_row.storage.uploading: - logger.error("Cannot add an uploading storage row: %s", legacy_image_row.storage.id) - return None - - for parent_image in parents: - if parent_image.storage.uploading: - logger.error("Cannot add an uploading storage row: %s", legacy_image_row.storage.id) - return None - - builder.add_layer(parent_image.storage.content_checksum, parent_image.v1_json_metadata) - - try: - built_manifest = builder.build(docker_v2_signing_key) - - # If the generated manifest is greater than the maximum size, regenerate it with - # intermediate metadata layers stripped down to their bare essentials. - if len(built_manifest.bytes.as_encoded_str()) > MAXIMUM_GENERATED_MANIFEST_SIZE: - built_manifest = builder.with_metadata_removed().build(docker_v2_signing_key) - - if len(built_manifest.bytes.as_encoded_str()) > MAXIMUM_GENERATED_MANIFEST_SIZE: - logger.error("Legacy image is too large to generate manifest") - return None - - return built_manifest - except ManifestException as me: - logger.exception( - "Got exception when trying to build manifest for legacy image %s", legacy_image_row - ) - return None - def _get_shared_storage(self, blob_digest): """ Returns an ImageStorage row for the blob digest if it is a globally shared storage. @@ -1337,7 +983,7 @@ class OCIModel(RegistryDataInterface): return None - def _lookup_repo_storages_by_content_checksum(self, repo, checksums, by_manifest=False): + def _lookup_repo_storages_by_content_checksum(self, repo, checksums): checksums = set(checksums) # Load any shared storages first. @@ -1350,11 +996,7 @@ class OCIModel(RegistryDataInterface): found = [] if checksums: - found = list( - model.storage.lookup_repo_storages_by_content_checksum( - repo, checksums, by_manifest=by_manifest - ) - ) + found = list(model.storage.lookup_repo_storages_by_content_checksum(repo, checksums)) return found + extra_storages diff --git a/data/registry_model/shared.py b/data/registry_model/shared.py new file mode 100644 index 000000000..67b2821c9 --- /dev/null +++ b/data/registry_model/shared.py @@ -0,0 +1,17 @@ +import uuid + +from hashids import Hashids + + +class SyntheticIDHandler(object): + def __init__(self, hash_salt=None): + self.hash_salt = hash_salt or str(uuid.uuid4()) + self.hashids = Hashids(alphabet="0123456789abcdef", min_length=64, salt=self.hash_salt) + + def encode(self, manifest_id, layer_index=0): + encoded = self.hashids.encode(manifest_id, layer_index) + assert len(encoded) == 64 + return encoded + + def decode(self, synthetic_v1_id): + return self.hashids.decode(synthetic_v1_id) diff --git a/data/registry_model/test/test_interface.py b/data/registry_model/test/test_interface.py index 16c894e8b..f126b7c8f 100644 --- a/data/registry_model/test/test_interface.py +++ b/data/registry_model/test/test_interface.py @@ -23,7 +23,6 @@ from data.database import ( ManifestLabel, TagManifest, TagManifestLabel, - DerivedStorageForImage, Tag, TagToRepositoryTag, ImageStorageLocation, @@ -32,6 +31,7 @@ from data.cache.impl import InMemoryDataModelCache from data.registry_model.registry_oci_model import OCIModel from data.registry_model.datatypes import RepositoryReference from data.registry_model.blobuploader import upload_blob, BlobUploadSettings +from data.model.oci.retriever import RepositoryContentRetriever from data.model.blob import store_blob_record_and_temp_link from image.shared.types import ManifestImageLayer from image.docker.schema1 import ( @@ -78,7 +78,6 @@ def test_find_matching_tag(names, expected, registry_model): assert found is None else: assert found.name in expected - assert found.repository.namespace_name == "devtable" assert found.repository.name == "simple" @@ -120,13 +119,9 @@ def test_lookup_manifests(repo_namespace, repo_name, registry_model): repository_ref = RepositoryReference.for_repo_obj(repo) found_tag = registry_model.find_matching_tag(repository_ref, ["latest"]) found_manifest = registry_model.get_manifest_for_tag(found_tag) - found = registry_model.lookup_manifest_by_digest( - repository_ref, found_manifest.digest, include_legacy_image=True - ) + found = registry_model.lookup_manifest_by_digest(repository_ref, found_manifest.digest) assert found._db_id == found_manifest._db_id assert found.digest == found_manifest.digest - assert found.legacy_image - assert found.legacy_image.parents schema1_parsed = registry_model.get_schema1_parsed_manifest(found, "foo", "bar", "baz", storage) assert schema1_parsed is not None @@ -211,26 +206,24 @@ def test_batch_labels(registry_model): ) def test_repository_tags(repo_namespace, repo_name, registry_model): repository_ref = registry_model.lookup_repository(repo_namespace, repo_name) - tags = registry_model.list_all_active_repository_tags( - repository_ref, include_legacy_images=True - ) + tags = registry_model.list_all_active_repository_tags(repository_ref) assert len(tags) tags_map = registry_model.get_legacy_tags_map(repository_ref, storage) for tag in tags: - found_tag = registry_model.get_repo_tag(repository_ref, tag.name, include_legacy_image=True) + found_tag = registry_model.get_repo_tag(repository_ref, tag.name) assert found_tag == tag - if found_tag.legacy_image is None: - continue - + retriever = RepositoryContentRetriever(repository_ref.id, storage) + legacy_image = tag.manifest.lookup_legacy_image(0, retriever) found_image = registry_model.get_legacy_image( - repository_ref, found_tag.legacy_image.docker_image_id + repository_ref, found_tag.manifest.legacy_image_root_id, storage ) - assert found_image == found_tag.legacy_image - assert tag.name in tags_map - assert tags_map[tag.name] == found_image.docker_image_id + + if found_image is not None: + assert found_image.docker_image_id == legacy_image.docker_image_id + assert tags_map[tag.name] == found_image.docker_image_id @pytest.mark.parametrize( @@ -242,12 +235,19 @@ def test_repository_tags(repo_namespace, repo_name, registry_model): ("public", "publicrepo", 1, False), ], ) -def test_repository_tag_history(namespace, name, expected_tag_count, has_expired, registry_model): +@pytest.mark.parametrize("with_size_fallback", [False, True,]) +def test_repository_tag_history( + namespace, name, expected_tag_count, has_expired, registry_model, with_size_fallback +): # Pre-cache media type loads to ensure consistent query count. Manifest.media_type.get_name(1) + # If size fallback is requested, delete the sizes on the manifest rows. + if with_size_fallback: + Manifest.update(layers_compressed_size=None).execute() + repository_ref = registry_model.lookup_repository(namespace, name) - with assert_query_count(2): + with assert_query_count(2 if with_size_fallback else 1): history, has_more = registry_model.list_repository_tag_history(repository_ref) assert not has_more assert len(history) == expected_tag_count @@ -323,9 +323,7 @@ def test_delete_tags(repo_namespace, repo_name, via_manifest, registry_model): # Make sure the tag is no longer found. with assert_query_count(1): - found_tag = registry_model.get_repo_tag( - repository_ref, tag.name, include_legacy_image=True - ) + found_tag = registry_model.get_repo_tag(repository_ref, tag.name) assert found_tag is None # Ensure all tags have been deleted. @@ -347,7 +345,9 @@ def test_retarget_tag_history(use_manifest, registry_model): repository_ref, history[0].manifest_digest, allow_dead=True ) else: - manifest_or_legacy_image = history[0].legacy_image + manifest_or_legacy_image = registry_model.get_legacy_image( + repository_ref, history[0].manifest.legacy_image_root_id, storage + ) # Retarget the tag. assert manifest_or_legacy_image @@ -364,7 +364,7 @@ def test_retarget_tag_history(use_manifest, registry_model): if use_manifest: assert updated_tag.manifest_digest == manifest_or_legacy_image.digest else: - assert updated_tag.legacy_image == manifest_or_legacy_image + assert updated_tag.manifest.legacy_image_root_id == manifest_or_legacy_image.docker_image_id # Ensure history has been updated. new_history, _ = registry_model.list_repository_tag_history(repository_ref) @@ -388,15 +388,17 @@ def test_change_repository_tag_expiration(registry_model): def test_get_security_status(registry_model): repository_ref = registry_model.lookup_repository("devtable", "simple") - tags = registry_model.list_all_active_repository_tags( - repository_ref, include_legacy_images=True - ) + tags = registry_model.list_all_active_repository_tags(repository_ref) assert len(tags) for tag in tags: - assert registry_model.get_security_status(tag.legacy_image) - registry_model.reset_security_status(tag.legacy_image) - assert registry_model.get_security_status(tag.legacy_image) + legacy_image = registry_model.get_legacy_image( + repository_ref, tag.manifest.legacy_image_root_id, storage + ) + assert legacy_image + assert registry_model.get_security_status(legacy_image) + registry_model.reset_security_status(legacy_image) + assert registry_model.get_security_status(legacy_image) @pytest.fixture() @@ -504,145 +506,6 @@ def test_manifest_remote_layers(oci_model): assert layers[0].blob is None -def test_derived_image(registry_model): - # Clear all existing derived storage. - DerivedStorageForImage.delete().execute() - - repository_ref = registry_model.lookup_repository("devtable", "simple") - tag = registry_model.get_repo_tag(repository_ref, "latest") - manifest = registry_model.get_manifest_for_tag(tag) - - # Ensure the squashed image doesn't exist. - assert registry_model.lookup_derived_image(manifest, "squash", storage, {}) is None - - # Create a new one. - squashed = registry_model.lookup_or_create_derived_image( - manifest, "squash", "local_us", storage, {} - ) - assert ( - registry_model.lookup_or_create_derived_image(manifest, "squash", "local_us", storage, {}) - == squashed - ) - assert squashed.unique_id - - # Check and set the size. - assert squashed.blob.compressed_size is None - registry_model.set_derived_image_size(squashed, 1234) - - found = registry_model.lookup_derived_image(manifest, "squash", storage, {}) - assert found.blob.compressed_size == 1234 - assert found.unique_id == squashed.unique_id - - # Ensure its returned now. - assert found == squashed - - # Ensure different metadata results in a different derived image. - found = registry_model.lookup_derived_image(manifest, "squash", storage, {"foo": "bar"}) - assert found is None - - squashed_foo = registry_model.lookup_or_create_derived_image( - manifest, "squash", "local_us", storage, {"foo": "bar"} - ) - assert squashed_foo != squashed - - found = registry_model.lookup_derived_image(manifest, "squash", storage, {"foo": "bar"}) - assert found == squashed_foo - - assert squashed.unique_id != squashed_foo.unique_id - - # Lookup with placements. - squashed = registry_model.lookup_or_create_derived_image( - manifest, "squash", "local_us", storage, {}, include_placements=True - ) - assert squashed.blob.placements - - # Delete the derived image. - registry_model.delete_derived_image(squashed) - assert registry_model.lookup_derived_image(manifest, "squash", storage, {}) is None - - -def test_derived_image_signatures(registry_model): - repository_ref = registry_model.lookup_repository("devtable", "simple") - tag = registry_model.get_repo_tag(repository_ref, "latest") - manifest = registry_model.get_manifest_for_tag(tag) - - derived = registry_model.lookup_or_create_derived_image( - manifest, "squash", "local_us", storage, {} - ) - assert derived - - registry_model.set_derived_image_signature(derived, "gpg2", "foo") - assert registry_model.get_derived_image_signature(derived, "gpg2") == "foo" - - -@pytest.mark.parametrize( - "manifest_builder, list_builder", - [ - (DockerSchema2ManifestBuilder, DockerSchema2ManifestListBuilder), - (OCIManifestBuilder, OCIIndexBuilder), - ], -) -def test_derived_image_for_manifest_list(manifest_builder, list_builder, oci_model): - # Clear all existing derived storage. - DerivedStorageForImage.delete().execute() - - # Create a config blob for testing. - config_json = json.dumps( - { - "config": {}, - "architecture": "amd64", - "os": "linux", - "rootfs": {"type": "layers", "diff_ids": []}, - "history": [ - {"created": "2018-04-03T18:37:09.284840891Z", "created_by": "do something",}, - ], - } - ) - - app_config = {"TESTING": True} - repository_ref = oci_model.lookup_repository("devtable", "simple") - with upload_blob(repository_ref, storage, BlobUploadSettings(500, 500)) as upload: - upload.upload_chunk(app_config, BytesIO(config_json.encode("utf-8"))) - blob = upload.commit_to_blob(app_config) - - # Create the manifest in the repo. - builder = manifest_builder() - builder.set_config_digest(blob.digest, blob.compressed_size) - builder.add_layer(blob.digest, blob.compressed_size) - amd64_manifest = builder.build() - - oci_model.create_manifest_and_retarget_tag( - repository_ref, amd64_manifest, "submanifest", storage, raise_on_error=True - ) - - # Create a manifest list, pointing to at least one amd64+linux manifest. - builder = list_builder() - builder.add_manifest(amd64_manifest, "amd64", "linux") - manifestlist = builder.build() - - oci_model.create_manifest_and_retarget_tag( - repository_ref, manifestlist, "listtag", storage, raise_on_error=True - ) - - manifest = oci_model.get_manifest_for_tag(oci_model.get_repo_tag(repository_ref, "listtag")) - assert manifest - assert manifest.get_parsed_manifest().is_manifest_list - - # Ensure the squashed image doesn't exist. - assert oci_model.lookup_derived_image(manifest, "squash", storage, {}) is None - - # Create a new one. - squashed = oci_model.lookup_or_create_derived_image(manifest, "squash", "local_us", storage, {}) - assert squashed.unique_id - assert ( - oci_model.lookup_or_create_derived_image(manifest, "squash", "local_us", storage, {}) - == squashed - ) - - # Perform lookup. - assert oci_model.lookup_derived_image(manifest, "squash", storage, {}) == squashed - - def test_blob_uploads(registry_model): repository_ref = registry_model.lookup_repository("devtable", "simple") @@ -763,13 +626,11 @@ def test_get_cached_repo_blob(registry_model): def test_create_manifest_and_retarget_tag(registry_model): repository_ref = registry_model.lookup_repository("devtable", "simple") - latest_tag = registry_model.get_repo_tag(repository_ref, "latest", include_legacy_image=True) + latest_tag = registry_model.get_repo_tag(repository_ref, "latest") manifest = registry_model.get_manifest_for_tag(latest_tag).get_parsed_manifest() builder = DockerSchema1ManifestBuilder("devtable", "simple", "anothertag") - builder.add_layer( - manifest.blob_digests[0], '{"id": "%s"}' % latest_tag.legacy_image.docker_image_id - ) + builder.add_layer(manifest.blob_digests[0], '{"id": "%s"}' % "someid") sample_manifest = builder.build(docker_v2_signing_key) assert sample_manifest is not None @@ -785,14 +646,14 @@ def test_create_manifest_and_retarget_tag(registry_model): def test_get_schema1_parsed_manifest(registry_model): repository_ref = registry_model.lookup_repository("devtable", "simple") - latest_tag = registry_model.get_repo_tag(repository_ref, "latest", include_legacy_image=True) + latest_tag = registry_model.get_repo_tag(repository_ref, "latest") manifest = registry_model.get_manifest_for_tag(latest_tag) assert registry_model.get_schema1_parsed_manifest(manifest, "", "", "", storage) def test_convert_manifest(registry_model): repository_ref = registry_model.lookup_repository("devtable", "simple") - latest_tag = registry_model.get_repo_tag(repository_ref, "latest", include_legacy_image=True) + latest_tag = registry_model.get_repo_tag(repository_ref, "latest") manifest = registry_model.get_manifest_for_tag(latest_tag) mediatypes = DOCKER_SCHEMA1_CONTENT_TYPES @@ -804,11 +665,11 @@ def test_convert_manifest(registry_model): def test_create_manifest_and_retarget_tag_with_labels(registry_model): repository_ref = registry_model.lookup_repository("devtable", "simple") - latest_tag = registry_model.get_repo_tag(repository_ref, "latest", include_legacy_image=True) + latest_tag = registry_model.get_repo_tag(repository_ref, "latest") manifest = registry_model.get_manifest_for_tag(latest_tag).get_parsed_manifest() json_metadata = { - "id": latest_tag.legacy_image.docker_image_id, + "id": "someid", "config": {"Labels": {"quay.expires-after": "2w",},}, } @@ -903,7 +764,8 @@ def test_unicode_emoji(registry_model): assert found.get_parsed_manifest().digest == manifest.digest -def test_lookup_active_repository_tags(oci_model): +@pytest.mark.parametrize("test_cached", [False, True,]) +def test_lookup_active_repository_tags(test_cached, oci_model): repository_ref = oci_model.lookup_repository("devtable", "simple") latest_tag = oci_model.get_repo_tag(repository_ref, "latest") manifest = oci_model.get_manifest_for_tag(latest_tag) @@ -924,7 +786,14 @@ def test_lookup_active_repository_tags(oci_model): tags_found = set() tag_id = None while True: - tags = oci_model.lookup_active_repository_tags(repository_ref, tag_id, 11) + if test_cached: + model_cache = InMemoryDataModelCache() + tags = oci_model.lookup_cached_active_repository_tags( + model_cache, repository_ref, tag_id, 11 + ) + else: + tags = oci_model.lookup_active_repository_tags(repository_ref, tag_id, 11) + assert len(tags) <= 11 for tag in tags[0:10]: assert tag.name not in tags_found @@ -942,49 +811,27 @@ def test_lookup_active_repository_tags(oci_model): assert not tags_expected -def test_yield_tags_for_vulnerability_notification(registry_model): - repository_ref = registry_model.lookup_repository("devtable", "complex") - - # Check for all legacy images under the tags and ensure not raised because - # no notification is yet registered. - for tag in registry_model.list_all_active_repository_tags( - repository_ref, include_legacy_images=True - ): - image = registry_model.get_legacy_image( - repository_ref, tag.legacy_image.docker_image_id, include_blob=True - ) - pairs = [(image.docker_image_id, image.blob.uuid)] - results = list(registry_model.yield_tags_for_vulnerability_notification(pairs)) - assert not len(results) - - # Register a notification. - model.notification.create_repo_notification( - repository_ref.id, "vulnerability_found", "email", {}, {} +def test_create_manifest_with_temp_tag(initialized_db, registry_model): + builder = DockerSchema1ManifestBuilder("devtable", "simple", "latest") + builder.add_layer( + "sha256:abcde", json.dumps({"id": "someid", "author": "some user",}, ensure_ascii=False) ) - # Check again. - for tag in registry_model.list_all_active_repository_tags( - repository_ref, include_legacy_images=True - ): - image = registry_model.get_legacy_image( - repository_ref, - tag.legacy_image.docker_image_id, - include_blob=True, - include_parents=True, - ) + manifest = builder.build(ensure_ascii=False) - # Check for every parent of the image. - for current in image.parents: - img = registry_model.get_legacy_image( - repository_ref, current.docker_image_id, include_blob=True - ) - pairs = [(img.docker_image_id, img.blob.uuid)] - results = list(registry_model.yield_tags_for_vulnerability_notification(pairs)) - assert len(results) > 0 - assert tag.name in {t.name for t in results} + for blob_digest in manifest.local_blob_digests: + _populate_blob(blob_digest) - # Check for the image itself. - pairs = [(image.docker_image_id, image.blob.uuid)] - results = list(registry_model.yield_tags_for_vulnerability_notification(pairs)) - assert len(results) > 0 - assert tag.name in {t.name for t in results} + # Create the manifest in the database. + repository_ref = registry_model.lookup_repository("devtable", "simple") + created = registry_model.create_manifest_with_temp_tag(repository_ref, manifest, 300, storage) + assert created.digest == manifest.digest + + # Ensure it cannot be found normally, since it is simply temp-tagged. + assert registry_model.lookup_manifest_by_digest(repository_ref, manifest.digest) is None + + # Ensure it can be found, which means it is temp-tagged. + found = registry_model.lookup_manifest_by_digest( + repository_ref, manifest.digest, allow_dead=True + ) + assert found is not None diff --git a/data/registry_model/test/test_manifestbuilder.py b/data/registry_model/test/test_manifestbuilder.py index b5a4ffa8b..8a65ce3a6 100644 --- a/data/registry_model/test/test_manifestbuilder.py +++ b/data/registry_model/test/test_manifestbuilder.py @@ -82,10 +82,9 @@ def test_build_manifest(layers, fake_session, registry_model): builder.done() # Verify the legacy image for the tag. - found = registry_model.get_repo_tag(repository_ref, "somenewtag", include_legacy_image=True) + found = registry_model.get_repo_tag(repository_ref, "somenewtag") assert found assert found.name == "somenewtag" - assert found.legacy_image.docker_image_id == layers[-1][0] # Verify the blob and manifest. manifest = registry_model.get_manifest_for_tag(found) diff --git a/data/registry_model/test/test_model_shared.py b/data/registry_model/test/test_model_shared.py new file mode 100644 index 000000000..fbaeda372 --- /dev/null +++ b/data/registry_model/test/test_model_shared.py @@ -0,0 +1,19 @@ +import pytest + +from data.registry_model.shared import SyntheticIDHandler + + +@pytest.mark.parametrize("manifest_id", [1, 1000, 10000, 60000]) +@pytest.mark.parametrize("hash_salt", [None, "", "testing1234", "foobarbaz",]) +def test_handler(manifest_id, hash_salt): + handler = SyntheticIDHandler(hash_salt) + for index in range(0, 10): + assert handler.decode(handler.encode(manifest_id, layer_index=index)) == ( + manifest_id, + index, + ) + + +def test_invalid_value(): + handler = SyntheticIDHandler("somehash") + assert handler.decode("invalidvalue") == () diff --git a/data/secscan_model/__init__.py b/data/secscan_model/__init__.py index a5509ecf1..597896135 100644 --- a/data/secscan_model/__init__.py +++ b/data/secscan_model/__init__.py @@ -3,8 +3,13 @@ import logging from collections import namedtuple from data.secscan_model.secscan_v2_model import V2SecurityScanner, NoopV2SecurityScanner -from data.secscan_model.secscan_v4_model import V4SecurityScanner, NoopV4SecurityScanner +from data.secscan_model.secscan_v4_model import ( + V4SecurityScanner, + NoopV4SecurityScanner, + ScanToken as V4ScanToken, +) from data.secscan_model.interface import SecurityScannerInterface, InvalidConfigurationException +from data.secscan_model.datatypes import SecurityInformationLookupResult, ScanLookupStatus from data.database import Manifest from data.registry_model.datatypes import Manifest as ManifestDataType @@ -12,68 +17,52 @@ from data.registry_model.datatypes import Manifest as ManifestDataType logger = logging.getLogger(__name__) -SplitScanToken = namedtuple("NextScanToken", ["version", "token"]) - - class SecurityScannerModelProxy(SecurityScannerInterface): def configure(self, app, instance_keys, storage): - # TODO(alecmerdler): Just use `V4SecurityScanner` once Clair V2 is removed. try: - self._model = V2SecurityScanner(app, instance_keys, storage) + self._model = V4SecurityScanner(app, instance_keys, storage) except InvalidConfigurationException: - self._model = NoopV2SecurityScanner() + self._model = NoopV4SecurityScanner() try: - self._v4_model = V4SecurityScanner(app, instance_keys, storage) + self._legacy_model = V2SecurityScanner(app, instance_keys, storage) except InvalidConfigurationException: - self._v4_model = NoopV4SecurityScanner() - - self._v4_namespace_whitelist = app.config.get("SECURITY_SCANNER_V4_NAMESPACE_WHITELIST", []) + self._legacy_model = NoopV2SecurityScanner() logger.info("===============================") - logger.info("Using split secscan model: `%s`", [self._model, self._v4_model]) - logger.info("v4 whitelist `%s`", self._v4_namespace_whitelist) + logger.info("Using split secscan model: `%s`", [self._legacy_model, self._model]) logger.info("===============================") return self def perform_indexing(self, next_token=None): - if next_token is None: - return SplitScanToken("v4", self._v4_model.perform_indexing(None)) + if next_token is not None: + assert isinstance(next_token, V4ScanToken) + assert isinstance(next_token.min_id, int) - if next_token.version == "v4" and next_token.token is not None: - return SplitScanToken("v4", self._v4_model.perform_indexing(next_token.token)) - - if next_token.version == "v4" and next_token.token is None: - return SplitScanToken("v2", self._model.perform_indexing(None)) - - if next_token.version == "v2" and next_token.token is not None: - return SplitScanToken("v2", self._model.perform_indexing(next_token.token)) - - if next_token.version == "v2" and next_token.token is None: - return None + return self._model.perform_indexing(next_token) def load_security_information(self, manifest_or_legacy_image, include_vulnerabilities): - if isinstance(manifest_or_legacy_image, ManifestDataType): - namespace = Manifest.get( - manifest_or_legacy_image._db_id - ).repository.namespace_user.username + manifest = manifest_or_legacy_image.as_manifest() - if namespace in self._v4_namespace_whitelist: - return self._v4_model.load_security_information( - manifest_or_legacy_image, include_vulnerabilities - ) + info = self._model.load_security_information(manifest, include_vulnerabilities) + if info.status != ScanLookupStatus.NOT_YET_INDEXED: + return info - return self._model.load_security_information( + legacy_info = self._legacy_model.load_security_information( manifest_or_legacy_image, include_vulnerabilities ) + if legacy_info.status != ScanLookupStatus.UNSUPPORTED_FOR_INDEXING: + return legacy_info + + return SecurityInformationLookupResult.with_status(ScanLookupStatus.NOT_YET_INDEXED) def register_model_cleanup_callbacks(self, data_model_config): return self._model.register_model_cleanup_callbacks(data_model_config) @property def legacy_api_handler(self): - return self._model.legacy_api_handler + return self._legacy_model.legacy_api_handler secscan_model = SecurityScannerModelProxy() diff --git a/data/secscan_model/secscan_v2_model.py b/data/secscan_model/secscan_v2_model.py index 94ff8efa6..07c92a6bd 100644 --- a/data/secscan_model/secscan_v2_model.py +++ b/data/secscan_model/secscan_v2_model.py @@ -1,13 +1,10 @@ import logging from collections import namedtuple -from math import log10 from prometheus_client import Gauge from deprecated import deprecated -from data.database import UseThenDisconnect - from data.secscan_model.interface import SecurityScannerInterface, InvalidConfigurationException from data.secscan_model.datatypes import ( ScanLookupStatus, @@ -21,14 +18,6 @@ from data.secscan_model.datatypes import ( from data.registry_model import registry_model from data.registry_model.datatypes import SecurityScanStatus -from data.model.image import ( - get_images_eligible_for_scan, - get_image_pk_field, - get_max_id_for_sec_scan, - get_min_id_for_sec_scan, -) - -from util.migrate.allocator import yield_random_entries from util.config import URLSchemeAndHostname from util.secscan.api import V2SecurityConfigValidator, SecurityScannerAPI, APIRequestFailure from util.secscan.secscan_util import get_blob_download_uri_getter @@ -111,12 +100,8 @@ class V2SecurityScanner(SecurityScannerInterface): instance_keys=instance_keys, ) - # NOTE: This import is in here because otherwise this class would depend upon app. - # Its not great, but as this is intended to be legacy until its removed, its okay. - from util.secscan.analyzer import LayerAnalyzer - - self._target_version = app.config.get("SECURITY_SCANNER_ENGINE_VERSION_TARGET", 3) - self._analyzer = LayerAnalyzer(app.config, self._legacy_secscan_api) + def register_model_cleanup_callbacks(self, data_model_config): + pass @property def legacy_api_handler(self): @@ -125,12 +110,6 @@ class V2SecurityScanner(SecurityScannerInterface): """ return self._legacy_secscan_api - def register_model_cleanup_callbacks(self, data_model_config): - if self._legacy_secscan_api is not None: - data_model_config.register_image_cleanup_callback( - self._legacy_secscan_api.cleanup_layers - ) - def load_security_information(self, manifest_or_legacy_image, include_vulnerabilities=False): status = registry_model.get_security_status(manifest_or_legacy_image) if status is None: @@ -164,80 +143,13 @@ class V2SecurityScanner(SecurityScannerInterface): return SecurityInformationLookupResult.for_request_error(str(arf)) if data is None: - # If no data was found but we reached this point, then it indicates we have incorrect security - # status for the manifest or legacy image. Mark the manifest or legacy image as unindexed - # so it automatically gets re-indexed. - if self.app.config.get("REGISTRY_STATE", "normal") == "normal": - registry_model.reset_security_status(manifest_or_legacy_image) - return SecurityInformationLookupResult.with_status(ScanLookupStatus.NOT_YET_INDEXED) return SecurityInformationLookupResult.for_data(SecurityInformation.from_dict(data)) - def _candidates_to_scan(self, start_token=None): - target_version = self._target_version - - def batch_query(): - return get_images_eligible_for_scan(target_version) - - # Find the minimum ID. - min_id = None - if start_token is not None: - min_id = start_token.min_id - else: - min_id = self.app.config.get("SECURITY_SCANNER_INDEXING_MIN_ID") - if min_id is None: - min_id = get_min_id_for_sec_scan(target_version) - - # Get the ID of the last image we can analyze. Will be None if there are no images in the - # database. - max_id = get_max_id_for_sec_scan() - if max_id is None: - return (None, None) - - if min_id is None or min_id > max_id: - return (None, None) - - # 4^log10(total) gives us a scalable batch size into the billions. - batch_size = int(4 ** log10(max(10, max_id - min_id))) - - # TODO: Once we have a clean shared NamedTuple for Images, send that to the secscan analyzer - # rather than the database Image itself. - iterator = yield_random_entries( - batch_query, get_image_pk_field(), batch_size, max_id, min_id, - ) - - return (iterator, ScanToken(max_id + 1)) - def perform_indexing(self, start_token=None): """ Performs indexing of the next set of unindexed manifests/images. - - If start_token is given, the indexing should resume from that point. Returns a new start - index for the next iteration of indexing. The tokens returned and given are assumed to be - opaque outside of this implementation and should not be relied upon by the caller to conform - to any particular format. + NOTE: Raises `NotImplementedError` because indexing for v2 is not supported. """ - # NOTE: This import is in here because otherwise this class would depend upon app. - # Its not great, but as this is intended to be legacy until its removed, its okay. - from util.secscan.analyzer import PreemptedException - - iterator, next_token = self._candidates_to_scan(start_token) - if iterator is None: - logger.debug("Found no additional images to scan") - return None - - with UseThenDisconnect(self.app.config): - for candidate, abt, num_remaining in iterator: - try: - self._analyzer.analyze_recursively(candidate) - except PreemptedException: - logger.debug("Another worker pre-empted us for layer: %s", candidate.id) - abt.set() - except APIRequestFailure: - logger.exception("Security scanner service unavailable") - return - - unscanned_images.set(num_remaining) - - return next_token + raise NotImplementedError("Unsupported for this security scanner version") diff --git a/data/secscan_model/secscan_v4_model.py b/data/secscan_model/secscan_v4_model.py index adfa79edf..cc1d67fc1 100644 --- a/data/secscan_model/secscan_v4_model.py +++ b/data/secscan_model/secscan_v4_model.py @@ -148,19 +148,11 @@ class V4SecurityScanner(SecurityScannerInterface): ) def perform_indexing(self, start_token=None): - whitelisted_namespaces = self.app.config.get("SECURITY_SCANNER_V4_NAMESPACE_WHITELIST", []) try: indexer_state = self._secscan_api.state() except APIRequestFailure: return None - def eligible_manifests(base_query): - return ( - base_query.join(Repository) - .join(User) - .where(User.username << whitelisted_namespaces) - ) - min_id = ( start_token.min_id if start_token is not None @@ -178,16 +170,14 @@ class V4SecurityScanner(SecurityScannerInterface): # TODO(alecmerdler): Filter out any `Manifests` that are still being uploaded def not_indexed_query(): return ( - eligible_manifests(Manifest.select()) - .switch(Manifest) + Manifest.select() .join(ManifestSecurityStatus, JOIN.LEFT_OUTER) .where(ManifestSecurityStatus.id >> None) ) def index_error_query(): return ( - eligible_manifests(Manifest.select()) - .switch(Manifest) + Manifest.select() .join(ManifestSecurityStatus) .where( ManifestSecurityStatus.index_status == IndexStatus.FAILED, @@ -197,8 +187,7 @@ class V4SecurityScanner(SecurityScannerInterface): def needs_reindexing_query(indexer_hash): return ( - eligible_manifests(Manifest.select()) - .switch(Manifest) + Manifest.select() .join(ManifestSecurityStatus) .where( ManifestSecurityStatus.indexer_hash != indexer_hash, @@ -209,6 +198,7 @@ class V4SecurityScanner(SecurityScannerInterface): # 4^log10(total) gives us a scalable batch size into the billions. batch_size = int(4 ** log10(max(10, max_id - min_id))) + # TODO(alecmerdler): We want to index newer manifests first, while backfilling older manifests... iterator = itertools.chain( yield_random_entries(not_indexed_query, Manifest.id, batch_size, max_id, min_id,), yield_random_entries(index_error_query, Manifest.id, batch_size, max_id, min_id,), diff --git a/data/secscan_model/test/test_secscan_interface.py b/data/secscan_model/test/test_secscan_interface.py index 212f0d0a0..cab596683 100644 --- a/data/secscan_model/test/test_secscan_interface.py +++ b/data/secscan_model/test/test_secscan_interface.py @@ -1,4 +1,5 @@ import pytest + from mock import patch, Mock from data.secscan_model.datatypes import ScanLookupStatus, SecurityInformationLookupResult @@ -8,8 +9,10 @@ from data.secscan_model.secscan_v4_model import ( IndexReportState, ScanToken as V4ScanToken, ) -from data.secscan_model import secscan_model, SplitScanToken +from data.secscan_model import secscan_model from data.registry_model import registry_model +from data.model.oci import shared +from data.database import ManifestSecurityStatus, IndexerVersion, IndexStatus, ManifestLegacyImage from test.fixtures import * @@ -17,84 +20,62 @@ from app import app, instance_keys, storage @pytest.mark.parametrize( - "repository, v4_whitelist", - [(("devtable", "complex"), []), (("devtable", "complex"), ["devtable"]),], + "indexed_v2, indexed_v4, expected_status", + [ + (False, False, ScanLookupStatus.NOT_YET_INDEXED), + (False, True, ScanLookupStatus.UNSUPPORTED_FOR_INDEXING), + (True, False, ScanLookupStatus.FAILED_TO_INDEX), + (True, True, ScanLookupStatus.UNSUPPORTED_FOR_INDEXING), + ], ) -def test_load_security_information_v2_only(repository, v4_whitelist, initialized_db): - app.config["SECURITY_SCANNER_V4_NAMESPACE_WHITELIST"] = v4_whitelist - +def test_load_security_information(indexed_v2, indexed_v4, expected_status, initialized_db): secscan_model.configure(app, instance_keys, storage) - repo = registry_model.lookup_repository(*repository) - for tag in registry_model.list_all_active_repository_tags(repo): - manifest = registry_model.get_manifest_for_tag(tag) - assert manifest + repository_ref = registry_model.lookup_repository("devtable", "simple") + tag = registry_model.find_matching_tag(repository_ref, ["latest"]) + manifest = registry_model.get_manifest_for_tag(tag) + assert manifest - result = secscan_model.load_security_information(manifest, True) - assert isinstance(result, SecurityInformationLookupResult) - assert result.status == ScanLookupStatus.NOT_YET_INDEXED + registry_model.populate_legacy_images_for_testing(manifest, storage) + + image = shared.get_legacy_image_for_manifest(manifest._db_id) + + if indexed_v2: + image.security_indexed = False + image.security_indexed_engine = 3 + image.save() + else: + ManifestLegacyImage.delete().where( + ManifestLegacyImage.manifest == manifest._db_id + ).execute() + + if indexed_v4: + ManifestSecurityStatus.create( + manifest=manifest._db_id, + repository=repository_ref._db_id, + error_json={}, + index_status=IndexStatus.MANIFEST_UNSUPPORTED, + indexer_hash="abc", + indexer_version=IndexerVersion.V4, + metadata_json={}, + ) + + result = secscan_model.load_security_information(manifest, True) + + assert isinstance(result, SecurityInformationLookupResult) + assert result.status == expected_status @pytest.mark.parametrize( - "repository, v4_whitelist", + "next_token, expected_next_token, expected_error", [ - (("devtable", "complex"), []), - (("devtable", "complex"), ["devtable"]), - (("buynlarge", "orgrepo"), ["devtable"]), - (("buynlarge", "orgrepo"), ["devtable", "buynlarge"]), - (("buynlarge", "orgrepo"), ["devtable", "buynlarge", "sellnsmall"]), + (None, V4ScanToken(56), None), + (V4ScanToken(None), V4ScanToken(56), AssertionError), + (V4ScanToken(1), V4ScanToken(56), None), + (V2ScanToken(158), V4ScanToken(56), AssertionError), ], ) -def test_load_security_information(repository, v4_whitelist, initialized_db): - app.config["SECURITY_SCANNER_V4_NAMESPACE_WHITELIST"] = v4_whitelist - app.config["SECURITY_SCANNER_V4_ENDPOINT"] = "http://clairv4:6060" - secscan_api = Mock() - - with patch("data.secscan_model.secscan_v4_model.ClairSecurityScannerAPI", secscan_api): - secscan_model.configure(app, instance_keys, storage) - - repo = registry_model.lookup_repository(*repository) - for tag in registry_model.list_all_active_repository_tags(repo): - manifest = registry_model.get_manifest_for_tag(tag) - assert manifest - - result = secscan_model.load_security_information(manifest, True) - assert isinstance(result, SecurityInformationLookupResult) - assert result.status == ScanLookupStatus.NOT_YET_INDEXED - - -@pytest.mark.parametrize( - "next_token, expected_next_token", - [ - (None, SplitScanToken("v4", None)), - (SplitScanToken("v4", V4ScanToken(1)), SplitScanToken("v4", None)), - (SplitScanToken("v4", None), SplitScanToken("v2", V2ScanToken(318))), - (SplitScanToken("v2", V2ScanToken(318)), SplitScanToken("v2", None)), - (SplitScanToken("v2", None), None), - ], -) -def test_perform_indexing_v2_only(next_token, expected_next_token, initialized_db): - def layer_analyzer(*args, **kwargs): - return Mock() - - with patch("util.secscan.analyzer.LayerAnalyzer", layer_analyzer): - secscan_model.configure(app, instance_keys, storage) - - assert secscan_model.perform_indexing(next_token) == expected_next_token - - -@pytest.mark.parametrize( - "next_token, expected_next_token", - [ - (None, SplitScanToken("v4", V4ScanToken(56))), - (SplitScanToken("v4", V4ScanToken(1)), SplitScanToken("v4", V4ScanToken(56))), - (SplitScanToken("v4", None), SplitScanToken("v2", V2ScanToken(318))), - (SplitScanToken("v2", V2ScanToken(318)), SplitScanToken("v2", None)), - (SplitScanToken("v2", None), None), - ], -) -def test_perform_indexing(next_token, expected_next_token, initialized_db): - app.config["SECURITY_SCANNER_V4_NAMESPACE_WHITELIST"] = ["devtable"] +def test_perform_indexing(next_token, expected_next_token, expected_error, initialized_db): app.config["SECURITY_SCANNER_V4_ENDPOINT"] = "http://clairv4:6060" def secscan_api(*args, **kwargs): @@ -104,11 +85,11 @@ def test_perform_indexing(next_token, expected_next_token, initialized_db): return api - def layer_analyzer(*args, **kwargs): - return Mock() - with patch("data.secscan_model.secscan_v4_model.ClairSecurityScannerAPI", secscan_api): - with patch("util.secscan.analyzer.LayerAnalyzer", layer_analyzer): - secscan_model.configure(app, instance_keys, storage) + secscan_model.configure(app, instance_keys, storage) + if expected_error is not None: + with pytest.raises(expected_error): + secscan_model.perform_indexing(next_token) + else: assert secscan_model.perform_indexing(next_token) == expected_next_token diff --git a/data/secscan_model/test/test_secscan_v2_model.py b/data/secscan_model/test/test_secscan_v2_model.py index c8d5376a0..a1cf13012 100644 --- a/data/secscan_model/test/test_secscan_v2_model.py +++ b/data/secscan_model/test/test_secscan_v2_model.py @@ -4,7 +4,7 @@ import pytest from data.secscan_model.datatypes import ScanLookupStatus, SecurityInformation from data.secscan_model.secscan_v2_model import V2SecurityScanner from data.registry_model import registry_model -from data.database import Manifest, Image +from data.database import Manifest, Image, ManifestSecurityStatus, IndexStatus, IndexerVersion from data.model.oci import shared from data.model.image import set_secscan_status @@ -15,8 +15,10 @@ from app import app, instance_keys, storage def test_load_security_information_unknown_manifest(initialized_db): repository_ref = registry_model.lookup_repository("devtable", "simple") - tag = registry_model.get_repo_tag(repository_ref, "latest", include_legacy_image=True) - manifest = registry_model.get_manifest_for_tag(tag, backfill_if_necessary=True) + tag = registry_model.get_repo_tag(repository_ref, "latest") + manifest = registry_model.get_manifest_for_tag(tag) + + registry_model.populate_legacy_images_for_testing(manifest, storage) # Delete the manifest. Manifest.get(id=manifest._db_id).delete_instance(recursive=True) @@ -30,8 +32,10 @@ def test_load_security_information_unknown_manifest(initialized_db): def test_load_security_information_failed_to_index(initialized_db): repository_ref = registry_model.lookup_repository("devtable", "simple") - tag = registry_model.get_repo_tag(repository_ref, "latest", include_legacy_image=True) - manifest = registry_model.get_manifest_for_tag(tag, backfill_if_necessary=True) + tag = registry_model.get_repo_tag(repository_ref, "latest") + manifest = registry_model.get_manifest_for_tag(tag) + + registry_model.populate_legacy_images_for_testing(manifest, storage) # Set the index status. image = shared.get_legacy_image_for_manifest(manifest._db_id) @@ -45,8 +49,10 @@ def test_load_security_information_failed_to_index(initialized_db): def test_load_security_information_queued(initialized_db): repository_ref = registry_model.lookup_repository("devtable", "simple") - tag = registry_model.get_repo_tag(repository_ref, "latest", include_legacy_image=True) - manifest = registry_model.get_manifest_for_tag(tag, backfill_if_necessary=True) + tag = registry_model.get_repo_tag(repository_ref, "latest") + manifest = registry_model.get_manifest_for_tag(tag) + + registry_model.populate_legacy_images_for_testing(manifest, storage) secscan = V2SecurityScanner(app, instance_keys, storage) assert secscan.load_security_information(manifest).status == ScanLookupStatus.NOT_YET_INDEXED @@ -87,11 +93,14 @@ def test_load_security_information_queued(initialized_db): ) def test_load_security_information_api_responses(secscan_api_response, initialized_db): repository_ref = registry_model.lookup_repository("devtable", "simple") - tag = registry_model.get_repo_tag(repository_ref, "latest", include_legacy_image=True) - manifest = registry_model.get_manifest_for_tag( - tag, backfill_if_necessary=True, include_legacy_image=True - ) - set_secscan_status(Image.get(id=manifest.legacy_image._db_id), True, 3) + tag = registry_model.get_repo_tag(repository_ref, "latest") + manifest = registry_model.get_manifest_for_tag(tag) + + registry_model.populate_legacy_images_for_testing(manifest, storage) + + legacy_image_row = shared.get_legacy_image_for_manifest(manifest._db_id) + assert legacy_image_row is not None + set_secscan_status(legacy_image_row, True, 3) secscan = V2SecurityScanner(app, instance_keys, storage) secscan._legacy_secscan_api = mock.Mock() @@ -110,3 +119,10 @@ def test_load_security_information_api_responses(secscan_api_response, initializ assert len(security_information.Layer.Features) == len( secscan_api_response["Layer"].get("Features", []) ) + + +def test_perform_indexing(initialized_db): + secscan = V2SecurityScanner(app, instance_keys, storage) + + with pytest.raises(NotImplementedError): + secscan.perform_indexing() diff --git a/data/secscan_model/test/test_secscan_v4_model.py b/data/secscan_model/test/test_secscan_v4_model.py index 91084113d..e2f09071c 100644 --- a/data/secscan_model/test/test_secscan_v4_model.py +++ b/data/secscan_model/test/test_secscan_v4_model.py @@ -33,8 +33,8 @@ def set_secscan_config(): def test_load_security_information_queued(initialized_db, set_secscan_config): repository_ref = registry_model.lookup_repository("devtable", "simple") - tag = registry_model.get_repo_tag(repository_ref, "latest", include_legacy_image=True) - manifest = registry_model.get_manifest_for_tag(tag, backfill_if_necessary=True) + tag = registry_model.get_repo_tag(repository_ref, "latest") + manifest = registry_model.get_manifest_for_tag(tag) secscan = V4SecurityScanner(app, instance_keys, storage) assert secscan.load_security_information(manifest).status == ScanLookupStatus.NOT_YET_INDEXED @@ -42,8 +42,8 @@ def test_load_security_information_queued(initialized_db, set_secscan_config): def test_load_security_information_failed_to_index(initialized_db, set_secscan_config): repository_ref = registry_model.lookup_repository("devtable", "simple") - tag = registry_model.get_repo_tag(repository_ref, "latest", include_legacy_image=True) - manifest = registry_model.get_manifest_for_tag(tag, backfill_if_necessary=True) + tag = registry_model.get_repo_tag(repository_ref, "latest") + manifest = registry_model.get_manifest_for_tag(tag) ManifestSecurityStatus.create( manifest=manifest._db_id, @@ -61,8 +61,8 @@ def test_load_security_information_failed_to_index(initialized_db, set_secscan_c def test_load_security_information_api_returns_none(initialized_db, set_secscan_config): repository_ref = registry_model.lookup_repository("devtable", "simple") - tag = registry_model.get_repo_tag(repository_ref, "latest", include_legacy_image=True) - manifest = registry_model.get_manifest_for_tag(tag, backfill_if_necessary=True) + tag = registry_model.get_repo_tag(repository_ref, "latest") + manifest = registry_model.get_manifest_for_tag(tag) ManifestSecurityStatus.create( manifest=manifest._db_id, @@ -83,8 +83,8 @@ def test_load_security_information_api_returns_none(initialized_db, set_secscan_ def test_load_security_information_api_request_failure(initialized_db, set_secscan_config): repository_ref = registry_model.lookup_repository("devtable", "simple") - tag = registry_model.get_repo_tag(repository_ref, "latest", include_legacy_image=True) - manifest = registry_model.get_manifest_for_tag(tag, backfill_if_necessary=True) + tag = registry_model.get_repo_tag(repository_ref, "latest") + manifest = registry_model.get_manifest_for_tag(tag) mss = ManifestSecurityStatus.create( manifest=manifest._db_id, @@ -106,8 +106,8 @@ def test_load_security_information_api_request_failure(initialized_db, set_secsc def test_load_security_information_success(initialized_db, set_secscan_config): repository_ref = registry_model.lookup_repository("devtable", "simple") - tag = registry_model.get_repo_tag(repository_ref, "latest", include_legacy_image=True) - manifest = registry_model.get_manifest_for_tag(tag, backfill_if_necessary=True) + tag = registry_model.get_repo_tag(repository_ref, "latest") + manifest = registry_model.get_manifest_for_tag(tag) ManifestSecurityStatus.create( manifest=manifest._db_id, @@ -140,11 +140,6 @@ def test_load_security_information_success(initialized_db, set_secscan_config): def test_perform_indexing_whitelist(initialized_db, set_secscan_config): - app.config["SECURITY_SCANNER_V4_NAMESPACE_WHITELIST"] = ["devtable"] - expected_manifests = ( - Manifest.select().join(Repository).join(User).where(User.username == "devtable") - ) - secscan = V4SecurityScanner(app, instance_keys, storage) secscan._secscan_api = mock.Mock() secscan._secscan_api.state.return_value = {"state": "abc"} @@ -155,38 +150,15 @@ def test_perform_indexing_whitelist(initialized_db, set_secscan_config): next_token = secscan.perform_indexing() - assert secscan._secscan_api.index.call_count == expected_manifests.count() - for mss in ManifestSecurityStatus.select(): - assert mss.repository.namespace_user.username == "devtable" - assert ManifestSecurityStatus.select().count() == expected_manifests.count() - assert ( - Manifest.get_by_id(next_token.min_id - 1).repository.namespace_user.username == "devtable" - ) - - -def test_perform_indexing_empty_whitelist(initialized_db, set_secscan_config): - app.config["SECURITY_SCANNER_V4_NAMESPACE_WHITELIST"] = [] - secscan = V4SecurityScanner(app, instance_keys, storage) - secscan._secscan_api = mock.Mock() - secscan._secscan_api.state.return_value = {"state": "abc"} - secscan._secscan_api.index.return_value = ( - {"err": None, "state": IndexReportState.Index_Finished}, - "abc", - ) - - next_token = secscan.perform_indexing() - - assert secscan._secscan_api.index.call_count == 0 - assert ManifestSecurityStatus.select().count() == 0 assert next_token.min_id == Manifest.select(fn.Max(Manifest.id)).scalar() + 1 + assert secscan._secscan_api.index.call_count == Manifest.select().count() + assert ManifestSecurityStatus.select().count() == Manifest.select().count() + for mss in ManifestSecurityStatus.select(): + assert mss.index_status == IndexStatus.COMPLETED + def test_perform_indexing_failed(initialized_db, set_secscan_config): - app.config["SECURITY_SCANNER_V4_NAMESPACE_WHITELIST"] = ["devtable"] - expected_manifests = ( - Manifest.select().join(Repository).join(User).where(User.username == "devtable") - ) - secscan = V4SecurityScanner(app, instance_keys, storage) secscan._secscan_api = mock.Mock() secscan._secscan_api.state.return_value = {"state": "abc"} @@ -195,7 +167,7 @@ def test_perform_indexing_failed(initialized_db, set_secscan_config): "abc", ) - for manifest in expected_manifests: + for manifest in Manifest.select(): ManifestSecurityStatus.create( manifest=manifest, repository=manifest.repository, @@ -210,16 +182,13 @@ def test_perform_indexing_failed(initialized_db, set_secscan_config): secscan.perform_indexing() - assert ManifestSecurityStatus.select().count() == expected_manifests.count() + assert ManifestSecurityStatus.select().count() == Manifest.select().count() for mss in ManifestSecurityStatus.select(): assert mss.index_status == IndexStatus.COMPLETED def test_perform_indexing_failed_within_reindex_threshold(initialized_db, set_secscan_config): app.config["SECURITY_SCANNER_V4_REINDEX_THRESHOLD"] = 300 - expected_manifests = ( - Manifest.select().join(Repository).join(User).where(User.username == "devtable") - ) secscan = V4SecurityScanner(app, instance_keys, storage) secscan._secscan_api = mock.Mock() @@ -229,7 +198,7 @@ def test_perform_indexing_failed_within_reindex_threshold(initialized_db, set_se "abc", ) - for manifest in expected_manifests: + for manifest in Manifest.select(): ManifestSecurityStatus.create( manifest=manifest, repository=manifest.repository, @@ -242,17 +211,12 @@ def test_perform_indexing_failed_within_reindex_threshold(initialized_db, set_se secscan.perform_indexing() - assert ManifestSecurityStatus.select().count() == expected_manifests.count() + assert ManifestSecurityStatus.select().count() == Manifest.select().count() for mss in ManifestSecurityStatus.select(): assert mss.index_status == IndexStatus.FAILED def test_perform_indexing_needs_reindexing(initialized_db, set_secscan_config): - app.config["SECURITY_SCANNER_V4_NAMESPACE_WHITELIST"] = ["devtable"] - expected_manifests = ( - Manifest.select().join(Repository).join(User).where(User.username == "devtable") - ) - secscan = V4SecurityScanner(app, instance_keys, storage) secscan._secscan_api = mock.Mock() secscan._secscan_api.state.return_value = {"state": "xyz"} @@ -261,7 +225,7 @@ def test_perform_indexing_needs_reindexing(initialized_db, set_secscan_config): "xyz", ) - for manifest in expected_manifests: + for manifest in Manifest.select(): ManifestSecurityStatus.create( manifest=manifest, repository=manifest.repository, @@ -276,7 +240,7 @@ def test_perform_indexing_needs_reindexing(initialized_db, set_secscan_config): secscan.perform_indexing() - assert ManifestSecurityStatus.select().count() == expected_manifests.count() + assert ManifestSecurityStatus.select().count() == Manifest.select().count() for mss in ManifestSecurityStatus.select(): assert mss.indexer_hash == "xyz" @@ -285,10 +249,6 @@ def test_perform_indexing_needs_reindexing_within_reindex_threshold( initialized_db, set_secscan_config ): app.config["SECURITY_SCANNER_V4_REINDEX_THRESHOLD"] = 300 - app.config["SECURITY_SCANNER_V4_NAMESPACE_WHITELIST"] = ["devtable"] - expected_manifests = ( - Manifest.select().join(Repository).join(User).where(User.username == "devtable") - ) secscan = V4SecurityScanner(app, instance_keys, storage) secscan._secscan_api = mock.Mock() @@ -298,7 +258,7 @@ def test_perform_indexing_needs_reindexing_within_reindex_threshold( "xyz", ) - for manifest in expected_manifests: + for manifest in Manifest.select(): ManifestSecurityStatus.create( manifest=manifest, repository=manifest.repository, @@ -311,14 +271,12 @@ def test_perform_indexing_needs_reindexing_within_reindex_threshold( secscan.perform_indexing() - assert ManifestSecurityStatus.select().count() == expected_manifests.count() + assert ManifestSecurityStatus.select().count() == Manifest.select().count() for mss in ManifestSecurityStatus.select(): assert mss.indexer_hash == "abc" def test_perform_indexing_api_request_failure_state(initialized_db, set_secscan_config): - app.config["SECURITY_SCANNER_V4_NAMESPACE_WHITELIST"] = ["devtable"] - secscan = V4SecurityScanner(app, instance_keys, storage) secscan._secscan_api = mock.Mock() secscan._secscan_api.state.side_effect = APIRequestFailure() @@ -330,14 +288,6 @@ def test_perform_indexing_api_request_failure_state(initialized_db, set_secscan_ def test_perform_indexing_api_request_failure_index(initialized_db, set_secscan_config): - app.config["SECURITY_SCANNER_V4_NAMESPACE_WHITELIST"] = ["devtable"] - expected_manifests = ( - Manifest.select(fn.Max(Manifest.id)) - .join(Repository) - .join(User) - .where(User.username == "devtable") - ) - secscan = V4SecurityScanner(app, instance_keys, storage) secscan._secscan_api = mock.Mock() secscan._secscan_api.state.return_value = {"state": "abc"} @@ -357,8 +307,8 @@ def test_perform_indexing_api_request_failure_index(initialized_db, set_secscan_ next_token = secscan.perform_indexing() - assert next_token.min_id == expected_manifests.scalar() + 1 - assert ManifestSecurityStatus.select().count() == expected_manifests.count() + assert next_token.min_id == Manifest.select(fn.Max(Manifest.id)).scalar() + 1 + assert ManifestSecurityStatus.select().count() == Manifest.select(fn.Max(Manifest.id)).count() def test_features_for(): diff --git a/endpoints/api/image.py b/endpoints/api/image.py index dc13aab5f..41c6f7f2d 100644 --- a/endpoints/api/image.py +++ b/endpoints/api/image.py @@ -3,6 +3,10 @@ List and lookup repository images. """ import json +from collections import defaultdict +from datetime import datetime + +from app import storage from data.registry_model import registry_model from endpoints.api import ( resource, @@ -17,7 +21,7 @@ from endpoints.api import ( from endpoints.exception import NotFound -def image_dict(image, with_history=False, with_tags=False): +def image_dict(image): parsed_command = None if image.command: try: @@ -31,19 +35,11 @@ def image_dict(image, with_history=False, with_tags=False): "comment": image.comment, "command": parsed_command, "size": image.image_size, - "uploading": image.uploading, - "sort_index": len(image.parents), + "uploading": False, + "sort_index": 0, } - if with_tags: - image_data["tags"] = [tag.name for tag in image.tags] - - if with_history: - image_data["history"] = [image_dict(parent) for parent in image.parents] - - # Calculate the ancestors string, with the DBID's replaced with the docker IDs. - parent_docker_ids = [parent_image.docker_image_id for parent_image in image.parents] - image_data["ancestors"] = "/{0}/".format("/".join(parent_docker_ids)) + image_data["ancestors"] = "/{0}/".format("/".join(image.ancestor_ids)) return image_data @@ -66,8 +62,35 @@ class RepositoryImageList(RepositoryParamResource): if repo_ref is None: raise NotFound() - images = registry_model.get_legacy_images(repo_ref) - return {"images": [image_dict(image, with_tags=True) for image in images]} + tags = registry_model.list_all_active_repository_tags(repo_ref) + images_with_tags = defaultdict(list) + for tag in tags: + legacy_image_id = tag.manifest.legacy_image_root_id + if legacy_image_id is not None: + images_with_tags[legacy_image_id].append(tag) + + # NOTE: This is replicating our older response for this endpoint, but + # returns empty for the metadata fields. This is to ensure back-compat + # for callers still using the deprecated API, while not having to load + # all the manifests from storage. + return { + "images": [ + { + "id": image_id, + "created": format_date( + datetime.utcfromtimestamp((min([tag.lifetime_start_ts for tag in tags]))) + ), + "comment": "", + "command": "", + "size": 0, + "uploading": False, + "sort_index": 0, + "tags": [tag.name for tag in tags], + "ancestors": "", + } + for image_id, tags in images_with_tags.items() + ] + } @resource("/v1/repository//image/") @@ -90,8 +113,8 @@ class RepositoryImage(RepositoryParamResource): if repo_ref is None: raise NotFound() - image = registry_model.get_legacy_image(repo_ref, image_id, include_parents=True) + image = registry_model.get_legacy_image(repo_ref, image_id, storage) if image is None: raise NotFound() - return image_dict(image, with_history=True) + return image_dict(image) diff --git a/endpoints/api/manifest.py b/endpoints/api/manifest.py index 9baf8a7a7..502885c5b 100644 --- a/endpoints/api/manifest.py +++ b/endpoints/api/manifest.py @@ -4,6 +4,7 @@ Manage the manifests of a repository. import json import logging +from datetime import datetime from flask import request from app import label_validator, storage @@ -74,10 +75,6 @@ def _layer_dict(manifest_layer, index): def _manifest_dict(manifest): - image = None - if manifest.legacy_image_if_present is not None: - image = image_dict(manifest.legacy_image, with_history=True) - layers = None if not manifest.is_manifest_list: layers = registry_model.list_manifest_layers(manifest, storage) @@ -85,14 +82,30 @@ def _manifest_dict(manifest): logger.debug("Missing layers for manifest `%s`", manifest.digest) abort(404) + image = None + if manifest.legacy_image_root_id: + # NOTE: This is replicating our older response for this endpoint, but + # returns empty for the metadata fields. This is to ensure back-compat + # for callers still using the deprecated API. + image = { + "id": manifest.legacy_image_root_id, + "created": format_date(datetime.utcnow()), + "comment": "", + "command": "", + "size": 0, + "uploading": False, + "sort_index": 0, + "ancestors": "", + } + return { "digest": manifest.digest, "is_manifest_list": manifest.is_manifest_list, "manifest_data": manifest.internal_manifest_bytes.as_unicode(), - "image": image, "layers": ( [_layer_dict(lyr.layer_info, idx) for idx, lyr in enumerate(layers)] if layers else None ), + "image": image, } @@ -112,9 +125,7 @@ class RepositoryManifest(RepositoryParamResource): if repo_ref is None: raise NotFound() - manifest = registry_model.lookup_manifest_by_digest( - repo_ref, manifestref, include_legacy_image=True - ) + manifest = registry_model.lookup_manifest_by_digest(repo_ref, manifestref) if manifest is None: raise NotFound() diff --git a/endpoints/api/repository_models_pre_oci.py b/endpoints/api/repository_models_pre_oci.py index 45773c051..6cc05e0f7 100644 --- a/endpoints/api/repository_models_pre_oci.py +++ b/endpoints/api/repository_models_pre_oci.py @@ -161,7 +161,7 @@ class PreOCIModel(RepositoryDataInterface): repo.namespace_user.username, repo.name, repo.rid in star_set, - repo.visibility_id == model.repository.get_public_repo_visibility().id, + model.repository.is_repository_public(repo), repo_kind, repo.description, repo.namespace_user.organization, @@ -257,8 +257,8 @@ class PreOCIModel(RepositoryDataInterface): tags = [ Tag( tag.name, - tag.legacy_image.docker_image_id if tag.legacy_image_if_present else None, - tag.legacy_image.aggregate_size if tag.legacy_image_if_present else None, + tag.manifest.legacy_image_root_id, + tag.manifest_layers_size, tag.lifetime_start_ts, tag.manifest_digest, tag.lifetime_end_ts, diff --git a/endpoints/api/robot_models_pre_oci.py b/endpoints/api/robot_models_pre_oci.py index 54e9a4dea..4c010633e 100644 --- a/endpoints/api/robot_models_pre_oci.py +++ b/endpoints/api/robot_models_pre_oci.py @@ -25,7 +25,7 @@ class RobotPreOCIModel(RobotInterface): return [ Permission( permission.repository.name, - permission.repository.visibility.name, + model.repositoy.repository_visibility_name(permission.repository), permission.role.name, ) for permission in permissions diff --git a/endpoints/api/secscan.py b/endpoints/api/secscan.py index a5ec05943..c459c2cdd 100644 --- a/endpoints/api/secscan.py +++ b/endpoints/api/secscan.py @@ -7,6 +7,7 @@ import features from enum import Enum, unique +from app import storage from auth.decorators import process_basic_auth_no_pass from data.registry_model import registry_model from data.secscan_model import secscan_model @@ -101,7 +102,7 @@ class RepositoryImageSecurity(RepositoryParamResource): if repo_ref is None: raise NotFound() - legacy_image = registry_model.get_legacy_image(repo_ref, imageid) + legacy_image = registry_model.get_legacy_image(repo_ref, imageid, storage) if legacy_image is None: raise NotFound() diff --git a/endpoints/api/tag.py b/endpoints/api/tag.py index cb6da0f30..314ab6f4b 100644 --- a/endpoints/api/tag.py +++ b/endpoints/api/tag.py @@ -9,6 +9,7 @@ from auth.auth_context import get_authenticated_user from data.registry_model import registry_model from endpoints.api import ( resource, + deprecated, nickname, require_repo_read, require_repo_write, @@ -40,18 +41,11 @@ def _tag_dict(tag): if tag.lifetime_end_ts and tag.lifetime_end_ts > 0: tag_info["end_ts"] = tag.lifetime_end_ts - # TODO: Remove this once fully on OCI data model. - if tag.legacy_image_if_present: - tag_info["docker_image_id"] = tag.legacy_image.docker_image_id - tag_info["image_id"] = tag.legacy_image.docker_image_id - tag_info["size"] = tag.legacy_image.aggregate_size - - # TODO: Remove this check once fully on OCI data model. - if tag.manifest_digest: - tag_info["manifest_digest"] = tag.manifest_digest - - if tag.manifest: - tag_info["is_manifest_list"] = tag.manifest.is_manifest_list + tag_info["manifest_digest"] = tag.manifest_digest + tag_info["is_manifest_list"] = tag.manifest.is_manifest_list + tag_info["size"] = tag.manifest_layers_size + tag_info["docker_image_id"] = tag.manifest.legacy_image_root_id + tag_info["image_id"] = tag.manifest.legacy_image_root_id if tag.lifetime_start_ts and tag.lifetime_start_ts > 0: last_modified = format_date(datetime.utcfromtimestamp(tag.lifetime_start_ts)) @@ -188,7 +182,7 @@ class RepositoryTag(RepositoryParamResource): raise InvalidRequest("Could not update tag expiration; Tag has probably changed") if "image" in request.get_json() or "manifest_digest" in request.get_json(): - existing_tag = registry_model.get_repo_tag(repo_ref, tag, include_legacy_image=True) + existing_tag = registry_model.get_repo_tag(repo_ref, tag) manifest_or_image = None image_id = None @@ -201,7 +195,7 @@ class RepositoryTag(RepositoryParamResource): ) else: image_id = request.get_json()["image"] - manifest_or_image = registry_model.get_legacy_image(repo_ref, image_id) + manifest_or_image = registry_model.get_legacy_image(repo_ref, image_id, storage) if manifest_or_image is None: raise NotFound() @@ -272,6 +266,7 @@ class RepositoryTagImages(RepositoryParamResource): @nickname("listTagImages") @disallow_for_app_repositories @parse_args() + @deprecated() @query_param( "owned", "If specified, only images wholely owned by this tag are returned.", @@ -286,30 +281,42 @@ class RepositoryTagImages(RepositoryParamResource): if repo_ref is None: raise NotFound() - tag_ref = registry_model.get_repo_tag(repo_ref, tag, include_legacy_image=True) + tag_ref = registry_model.get_repo_tag(repo_ref, tag) if tag_ref is None: raise NotFound() - if tag_ref.legacy_image_if_present is None: + if parsed_args["owned"]: + # NOTE: This is deprecated, so we just return empty now. return {"images": []} - image_id = tag_ref.legacy_image.docker_image_id + manifest = registry_model.get_manifest_for_tag(tag_ref) + if manifest is None: + raise NotFound() - all_images = None - if parsed_args["owned"]: - # TODO: Remove the `owned` image concept once we are fully on V2_2. - all_images = registry_model.get_legacy_images_owned_by_tag(tag_ref) - else: - image_with_parents = registry_model.get_legacy_image( - repo_ref, image_id, include_parents=True - ) - if image_with_parents is None: - raise NotFound() - - all_images = [image_with_parents] + image_with_parents.parents + legacy_image = registry_model.get_legacy_image( + repo_ref, manifest.legacy_image_root_id, storage + ) + if legacy_image is None: + raise NotFound() + # NOTE: This is replicating our older response for this endpoint, but + # returns empty for the metadata fields. This is to ensure back-compat + # for callers still using the deprecated API, while not having to load + # all the manifests from storage. return { - "images": [image_dict(image) for image in all_images], + "images": [ + { + "id": image_id, + "created": format_date(datetime.utcfromtimestamp(tag_ref.lifetime_start_ts)), + "comment": "", + "command": "", + "size": 0, + "uploading": False, + "sort_index": 0, + "ancestors": "", + } + for image_id in legacy_image.full_image_id_chain + ] } @@ -374,7 +381,7 @@ class RestoreTag(RepositoryParamResource): repo_ref, manifest_digest, allow_dead=True, require_available=True ) elif image_id is not None: - manifest_or_legacy_image = registry_model.get_legacy_image(repo_ref, image_id) + manifest_or_legacy_image = registry_model.get_legacy_image(repo_ref, image_id, storage) if manifest_or_legacy_image is None: raise NotFound() diff --git a/endpoints/api/team.py b/endpoints/api/team.py index c2ca6e960..44167842c 100644 --- a/endpoints/api/team.py +++ b/endpoints/api/team.py @@ -49,7 +49,7 @@ def permission_view(permission): return { "repository": { "name": permission.repository.name, - "is_public": permission.repository.visibility.name == "public", + "is_public": model.repository.is_repository_public(permission.repository), }, "role": permission.role.name, } diff --git a/endpoints/api/test/test_deprecated_route.py b/endpoints/api/test/test_deprecated_route.py index c290ba5f1..361433021 100644 --- a/endpoints/api/test/test_deprecated_route.py +++ b/endpoints/api/test/test_deprecated_route.py @@ -11,16 +11,15 @@ from test.fixtures import * def test_deprecated_route(client): repository_ref = registry_model.lookup_repository("devtable", "simple") - tag = registry_model.get_repo_tag(repository_ref, "latest", include_legacy_image=True) - manifest = registry_model.get_manifest_for_tag(tag, backfill_if_necessary=True) - image = shared.get_legacy_image_for_manifest(manifest._db_id) + tag = registry_model.get_repo_tag(repository_ref, "latest") + manifest = registry_model.get_manifest_for_tag(tag) with client_with_identity("devtable", client) as cl: resp = conduct_api_call( cl, RepositoryImageSecurity, "get", - {"repository": "devtable/simple", "imageid": image.docker_image_id}, + {"repository": "devtable/simple", "imageid": manifest.legacy_image_root_id}, expected_code=200, ) diff --git a/endpoints/api/test/test_secscan.py b/endpoints/api/test/test_secscan.py index 561a17722..b88633399 100644 --- a/endpoints/api/test/test_secscan.py +++ b/endpoints/api/test/test_secscan.py @@ -13,12 +13,12 @@ from test.fixtures import * @pytest.mark.parametrize("endpoint", [RepositoryImageSecurity, RepositoryManifestSecurity,]) def test_get_security_info_with_pull_secret(endpoint, client): repository_ref = registry_model.lookup_repository("devtable", "simple") - tag = registry_model.get_repo_tag(repository_ref, "latest", include_legacy_image=True) - manifest = registry_model.get_manifest_for_tag(tag, backfill_if_necessary=True) + tag = registry_model.get_repo_tag(repository_ref, "latest") + manifest = registry_model.get_manifest_for_tag(tag) params = { "repository": "devtable/simple", - "imageid": tag.legacy_image.docker_image_id, + "imageid": tag.manifest.legacy_image_root_id, "manifestref": manifest.digest, } diff --git a/endpoints/api/test/test_tag.py b/endpoints/api/test/test_tag.py index b03550665..7afc04d28 100644 --- a/endpoints/api/test/test_tag.py +++ b/endpoints/api/test/test_tag.py @@ -69,10 +69,10 @@ def test_move_tag(image_exists, test_tag, expected_status, client, app): test_image = "unknown" if image_exists: repo_ref = registry_model.lookup_repository("devtable", "simple") - tag_ref = registry_model.get_repo_tag(repo_ref, "latest", include_legacy_image=True) + tag_ref = registry_model.get_repo_tag(repo_ref, "latest") assert tag_ref - test_image = tag_ref.legacy_image.docker_image_id + test_image = tag_ref.manifest.legacy_image_root_id params = {"repository": "devtable/simple", "tag": test_tag} request_body = {"image": test_image} @@ -86,12 +86,12 @@ def test_move_tag(image_exists, test_tag, expected_status, client, app): @pytest.mark.parametrize( "repo_namespace, repo_name, query_count", [ - ("devtable", "simple", 5), - ("devtable", "history", 5), - ("devtable", "complex", 5), - ("devtable", "gargantuan", 5), - ("buynlarge", "orgrepo", 7), # +2 for permissions checks. - ("buynlarge", "anotherorgrepo", 7), # +2 for permissions checks. + ("devtable", "simple", 4), + ("devtable", "history", 4), + ("devtable", "complex", 4), + ("devtable", "gargantuan", 4), + ("buynlarge", "orgrepo", 6), # +2 for permissions checks. + ("buynlarge", "anotherorgrepo", 6), # +2 for permissions checks. ], ) def test_list_repo_tags(repo_namespace, repo_name, client, query_count, app): @@ -109,18 +109,15 @@ def test_list_repo_tags(repo_namespace, repo_name, client, query_count, app): @pytest.mark.parametrize( - "repository, tag, owned, expect_images", + "repository, tag, expect_images", [ - ("devtable/simple", "prod", False, True), - ("devtable/simple", "prod", True, False), - ("devtable/simple", "latest", False, True), - ("devtable/simple", "latest", True, False), - ("devtable/complex", "prod", False, True), - ("devtable/complex", "prod", True, True), + ("devtable/simple", "prod", True), + ("devtable/simple", "latest", True), + ("devtable/complex", "prod", True), ], ) -def test_list_tag_images(repository, tag, owned, expect_images, client, app): +def test_list_tag_images(repository, tag, expect_images, client, app): with client_with_identity("devtable", client) as cl: - params = {"repository": repository, "tag": tag, "owned": owned} + params = {"repository": repository, "tag": tag} result = conduct_api_call(cl, RepositoryTagImages, "get", params, None, 200).json assert bool(result["images"]) == expect_images diff --git a/endpoints/api/user.py b/endpoints/api/user.py index 064921109..8cb9e0853 100644 --- a/endpoints/api/user.py +++ b/endpoints/api/user.py @@ -1087,7 +1087,7 @@ class StarredRepositoryList(ApiResource): "namespace": repo_obj.namespace_user.username, "name": repo_obj.name, "description": repo_obj.description, - "is_public": repo_obj.visibility.name == "public", + "is_public": model.repository.is_repository_public(repo_obj), } return {"repositories": [repo_view(repo) for repo in repos]}, next_page_token diff --git a/endpoints/appr/models_cnr.py b/endpoints/appr/models_cnr.py index ff077f6e5..c49a4c768 100644 --- a/endpoints/appr/models_cnr.py +++ b/endpoints/appr/models_cnr.py @@ -10,6 +10,7 @@ import data.model from app import app, storage, authentication, model_cache from data import appr_model +from data import model as data_model from data.cache import cache_key from data.database import Repository, MediaType, db_transaction from data.appr_model.models import NEW_MODELS @@ -173,7 +174,7 @@ class CNRAppModel(AppRegistryDataInterface): view = ApplicationSummaryView( namespace=repo.namespace_user.username, name=app_name, - visibility=repo.visibility.name, + visibility=data_model.repository.repository_visibility_name(repo), default=available_releases[0], channels=channels, manifests=manifests, diff --git a/endpoints/secscan.py b/endpoints/secscan.py index 395949b89..057b6d82c 100644 --- a/endpoints/secscan.py +++ b/endpoints/secscan.py @@ -1,33 +1,12 @@ import logging -import json -import features - -from app import secscan_notification_queue -from flask import request, make_response, Blueprint, abort -from endpoints.decorators import route_show_if, anon_allowed +from flask import make_response, Blueprint +from endpoints.decorators import anon_allowed logger = logging.getLogger(__name__) secscan = Blueprint("secscan", __name__) -@route_show_if(features.SECURITY_SCANNER) -@secscan.route("/notify", methods=["POST"]) -def secscan_notification(): - data = request.get_json() - logger.debug("Got notification from Security Scanner: %s", data) - if "Notification" not in data: - abort(400) - - notification = data["Notification"] - name = ["named", notification["Name"]] - - if not secscan_notification_queue.alive(name): - secscan_notification_queue.put(name, json.dumps(notification)) - - return make_response("Okay") - - @secscan.route("/_internal_ping") @anon_allowed def internal_ping(): diff --git a/endpoints/test/test_anon_checked.py b/endpoints/test/test_anon_checked.py index 7595a2b92..95c51085c 100644 --- a/endpoints/test/test_anon_checked.py +++ b/endpoints/test/test_anon_checked.py @@ -3,10 +3,9 @@ import pytest from app import app from endpoints.v1 import v1_bp from endpoints.v2 import v2_bp -from endpoints.verbs import verbs -@pytest.mark.parametrize("blueprint", [v2_bp, v1_bp, verbs,]) +@pytest.mark.parametrize("blueprint", [v2_bp, v1_bp,]) def test_verify_blueprint(blueprint): class Checker(object): def __init__(self): diff --git a/endpoints/v1/registry.py b/endpoints/v1/registry.py index ae94642d5..e38ecc9df 100644 --- a/endpoints/v1/registry.py +++ b/endpoints/v1/registry.py @@ -40,18 +40,7 @@ def require_completion(f): @wraps(f) def wrapper(namespace, repository, *args, **kwargs): - image_id = kwargs["image_id"] - repository_ref = registry_model.lookup_repository(namespace, repository) - if repository_ref is not None: - legacy_image = registry_model.get_legacy_image(repository_ref, image_id) - if legacy_image is not None and legacy_image.uploading: - abort( - 400, - "Image %(image_id)s is being uploaded, retry later", - issue="upload-in-progress", - image_id=image_id, - ) - + # TODO: Remove this return f(namespace, repository, *args, **kwargs) return wrapper @@ -102,7 +91,9 @@ def head_image_layer(namespace, repository, image_id, headers): abort(404) logger.debug("Looking up placement locations") - legacy_image = registry_model.get_legacy_image(repository_ref, image_id, include_blob=True) + legacy_image = registry_model.get_legacy_image( + repository_ref, image_id, store, include_blob=True + ) if legacy_image is None: logger.debug("Could not find any blob placement locations") abort(404, "Image %(image_id)s not found", issue="unknown-image", image_id=image_id) @@ -139,7 +130,9 @@ def get_image_layer(namespace, repository, image_id, headers): if repository_ref is None: abort(404) - legacy_image = registry_model.get_legacy_image(repository_ref, image_id, include_blob=True) + legacy_image = registry_model.get_legacy_image( + repository_ref, image_id, store, include_blob=True + ) if legacy_image is None: abort(404, "Image %(image_id)s not found", issue="unknown-image", image_id=image_id) @@ -351,7 +344,9 @@ def get_image_json(namespace, repository, image_id, headers): abort(403) logger.debug("Looking up repo image") - legacy_image = registry_model.get_legacy_image(repository_ref, image_id, include_blob=True) + legacy_image = registry_model.get_legacy_image( + repository_ref, image_id, store, include_blob=True + ) if legacy_image is None: flask_abort(404) @@ -381,15 +376,12 @@ def get_image_ancestry(namespace, repository, image_id, headers): abort(403) logger.debug("Looking up repo image") - legacy_image = registry_model.get_legacy_image(repository_ref, image_id, include_parents=True) + legacy_image = registry_model.get_legacy_image(repository_ref, image_id, store) if legacy_image is None: abort(404, "Image %(image_id)s not found", issue="unknown-image", image_id=image_id) # NOTE: We can not use jsonify here because we are returning a list not an object. - ancestor_ids = [legacy_image.docker_image_id] + [ - a.docker_image_id for a in legacy_image.parents - ] - response = make_response(json.dumps(ancestor_ids), 200) + response = make_response(json.dumps(legacy_image.full_image_id_chain), 200) response.headers.extend(headers) return response diff --git a/endpoints/v1/tag.py b/endpoints/v1/tag.py index d7cf3f5c6..c2900fb41 100644 --- a/endpoints/v1/tag.py +++ b/endpoints/v1/tag.py @@ -98,7 +98,7 @@ def put_tag(namespace_name, repo_name, tag): # Check if there is an existing image we should use (for PUT calls outside of a normal push # operation). - legacy_image = registry_model.get_legacy_image(repository_ref, image_id) + legacy_image = registry_model.get_legacy_image(repository_ref, image_id, storage) if legacy_image is None: abort(400) diff --git a/endpoints/v2/manifest.py b/endpoints/v2/manifest.py index 8428a6297..d1f2c987f 100644 --- a/endpoints/v2/manifest.py +++ b/endpoints/v2/manifest.py @@ -68,7 +68,7 @@ def fetch_manifest_by_tagname(namespace_name, repo_name, manifest_ref): image_pulls.labels("v2", "tag", 404).inc() raise ManifestUnknown() - manifest = registry_model.get_manifest_for_tag(tag, backfill_if_necessary=True) + manifest = registry_model.get_manifest_for_tag(tag) if manifest is None: # Something went wrong. image_pulls.labels("v2", "tag", 400).inc() diff --git a/endpoints/v2/test/test_blob.py b/endpoints/v2/test/test_blob.py index 3d71a407f..d87921b00 100644 --- a/endpoints/v2/test/test_blob.py +++ b/endpoints/v2/test/test_blob.py @@ -129,12 +129,13 @@ def test_blob_mounting(mount_digest, source_repo, username, expect_success, clie headers=headers, ) + repository = model.repository.get_repository("devtable", "building") + if expect_success: # Ensure the blob now exists under the repo. - model.blob.get_repo_blob_by_digest("devtable", "building", mount_digest) + assert model.oci.blob.get_repository_blob_by_digest(repository, mount_digest) else: - with pytest.raises(model.blob.BlobDoesNotExist): - model.blob.get_repo_blob_by_digest("devtable", "building", mount_digest) + assert model.oci.blob.get_repository_blob_by_digest(repository, mount_digest) is None def test_blob_upload_offset(client, app): diff --git a/endpoints/v2/test/test_manifest_cornercases.py b/endpoints/v2/test/test_manifest_cornercases.py index 0037292cf..38d61e1d4 100644 --- a/endpoints/v2/test/test_manifest_cornercases.py +++ b/endpoints/v2/test/test_manifest_cornercases.py @@ -31,6 +31,23 @@ def _perform_cleanup(): model.gc.garbage_collect_repo(repo_object) +def _get_legacy_image_row_id(tag): + return ( + database.ManifestLegacyImage.select(database.ManifestLegacyImage, database.Image) + .join(database.Image) + .where(database.ManifestLegacyImage.manifest == tag.manifest._db_id) + .get() + .image.docker_image_id + ) + + +def _add_legacy_image(namespace, repo_name, tag_name): + repo_ref = registry_model.lookup_repository(namespace, repo_name) + tag_ref = registry_model.get_repo_tag(repo_ref, tag_name) + manifest_ref = registry_model.get_manifest_for_tag(tag_ref) + registry_model.populate_legacy_images_for_testing(manifest_ref, storage) + + def test_missing_link(initialized_db): """ Tests for a corner case that could result in missing a link to a blob referenced by a manifest. @@ -54,6 +71,8 @@ def test_missing_link(initialized_db): that of `SECOND_ID`, leaving `THIRD_ID` unlinked and therefore, after a GC, missing `FOURTH_BLOB`. """ + # TODO: Remove this test once we stop writing legacy image rows. + with set_tag_expiration_policy("devtable", 0): location_name = storage.preferred_locations[0] location = database.ImageStorageLocation.get(name=location_name) @@ -72,21 +91,19 @@ def test_missing_link(initialized_db): ) _write_manifest(ADMIN_ACCESS_USER, REPO, FIRST_TAG, first_manifest) + _add_legacy_image(ADMIN_ACCESS_USER, REPO, FIRST_TAG) # Delete all temp tags and perform GC. _perform_cleanup() # Ensure that the first blob still exists, along with the first tag. - assert ( - model.blob.get_repo_blob_by_digest(ADMIN_ACCESS_USER, REPO, first_blob_sha) is not None - ) + repo = model.repository.get_repository(ADMIN_ACCESS_USER, REPO) + assert model.oci.blob.get_repository_blob_by_digest(repo, first_blob_sha) is not None repository_ref = registry_model.lookup_repository(ADMIN_ACCESS_USER, REPO) - found_tag = registry_model.get_repo_tag( - repository_ref, FIRST_TAG, include_legacy_image=True - ) + found_tag = registry_model.get_repo_tag(repository_ref, FIRST_TAG) assert found_tag is not None - assert found_tag.legacy_image.docker_image_id == "first" + assert _get_legacy_image_row_id(found_tag) == "first" # Create the second and third blobs. second_blob_sha = "sha256:" + hashlib.sha256(b"SECOND").hexdigest() @@ -108,6 +125,7 @@ def test_missing_link(initialized_db): ) _write_manifest(ADMIN_ACCESS_USER, REPO, SECOND_TAG, second_manifest) + _add_legacy_image(ADMIN_ACCESS_USER, REPO, SECOND_TAG) # Delete all temp tags and perform GC. _perform_cleanup() @@ -117,18 +135,14 @@ def test_missing_link(initialized_db): assert registry_model.get_repo_blob_by_digest(repository_ref, second_blob_sha) is not None assert registry_model.get_repo_blob_by_digest(repository_ref, third_blob_sha) is not None - found_tag = registry_model.get_repo_tag( - repository_ref, FIRST_TAG, include_legacy_image=True - ) + found_tag = registry_model.get_repo_tag(repository_ref, FIRST_TAG) assert found_tag is not None - assert found_tag.legacy_image.docker_image_id == "first" + assert _get_legacy_image_row_id(found_tag) == "first" # Ensure the IDs have changed. - found_tag = registry_model.get_repo_tag( - repository_ref, SECOND_TAG, include_legacy_image=True - ) + found_tag = registry_model.get_repo_tag(repository_ref, SECOND_TAG) assert found_tag is not None - assert found_tag.legacy_image.docker_image_id != "second" + assert _get_legacy_image_row_id(found_tag) != "second" # Create the fourth blob. fourth_blob_sha = "sha256:" + hashlib.sha256(b"FOURTH").hexdigest() @@ -147,6 +161,7 @@ def test_missing_link(initialized_db): ) _write_manifest(ADMIN_ACCESS_USER, REPO, THIRD_TAG, third_manifest) + _add_legacy_image(ADMIN_ACCESS_USER, REPO, THIRD_TAG) # Delete all temp tags and perform GC. _perform_cleanup() @@ -158,10 +173,6 @@ def test_missing_link(initialized_db): assert registry_model.get_repo_blob_by_digest(repository_ref, fourth_blob_sha) is not None # Ensure new synthesized IDs were created. - second_tag = registry_model.get_repo_tag( - repository_ref, SECOND_TAG, include_legacy_image=True - ) - third_tag = registry_model.get_repo_tag( - repository_ref, THIRD_TAG, include_legacy_image=True - ) - assert second_tag.legacy_image.docker_image_id != third_tag.legacy_image.docker_image_id + second_tag = registry_model.get_repo_tag(repository_ref, SECOND_TAG) + third_tag = registry_model.get_repo_tag(repository_ref, THIRD_TAG) + assert _get_legacy_image_row_id(second_tag) != _get_legacy_image_row_id(third_tag) diff --git a/endpoints/verbs/__init__.py b/endpoints/verbs/__init__.py deleted file mode 100644 index e2bb25f7c..000000000 --- a/endpoints/verbs/__init__.py +++ /dev/null @@ -1,535 +0,0 @@ -import hashlib -import json -import logging -import uuid - -from functools import wraps - -from flask import redirect, Blueprint, abort, send_file, make_response, request -from prometheus_client import Counter - -import features - -from app import app, signer, storage, config_provider, ip_resolver, instance_keys -from auth.auth_context import get_authenticated_user -from auth.decorators import process_auth -from auth.permissions import ReadRepositoryPermission -from data import database -from data import model -from data.registry_model import registry_model -from endpoints.decorators import ( - anon_protect, - anon_allowed, - route_show_if, - parse_repository_name, - check_region_blacklisted, -) -from endpoints.metrics import image_pulls, image_pulled_bytes -from endpoints.v2.blob import BLOB_DIGEST_ROUTE -from image.appc import AppCImageFormatter -from image.shared import ManifestException -from image.docker.squashed import SquashedDockerImageFormatter -from storage import Storage -from util.audit import track_and_log, wrap_repository -from util.http import exact_abort -from util.metrics.prometheus import timed_blueprint -from util.registry.filelike import wrap_with_handler -from util.registry.queuefile import QueueFile -from util.registry.queueprocess import QueueProcess -from util.registry.tarlayerformat import TarLayerFormatterReporter - - -logger = logging.getLogger(__name__) -verbs = timed_blueprint(Blueprint("verbs", __name__)) - - -verb_stream_passes = Counter( - "quay_verb_stream_passes_total", - "number of passes over a tar stream used by verb requests", - labelnames=["kind"], -) - - -LAYER_MIMETYPE = "binary/octet-stream" -QUEUE_FILE_TIMEOUT = 15 # seconds - - -class VerbReporter(TarLayerFormatterReporter): - def __init__(self, kind): - self.kind = kind - - def report_pass(self, pass_count): - if pass_count: - verb_stream_passes.labels(self.kind).inc(pass_count) - - -def _open_stream(formatter, tag, schema1_manifest, derived_image_id, handlers, reporter): - """ - This method generates a stream of data which will be replicated and read from the queue files. - - This method runs in a separate process. - """ - # For performance reasons, we load the full image list here, cache it, then disconnect from - # the database. - with database.UseThenDisconnect(app.config): - layers = registry_model.list_parsed_manifest_layers( - tag.repository, schema1_manifest, storage, include_placements=True - ) - - def image_stream_getter(store, blob): - def get_stream_for_storage(): - current_image_stream = store.stream_read_file(blob.placements, blob.storage_path) - logger.debug("Returning blob %s: %s", blob.digest, blob.storage_path) - return current_image_stream - - return get_stream_for_storage - - def tar_stream_getter_iterator(): - # Re-Initialize the storage engine because some may not respond well to forking (e.g. S3) - store = Storage(app, config_provider=config_provider, ip_resolver=ip_resolver) - - # Note: We reverse because we have to start at the leaf layer and move upward, - # as per the spec for the formatters. - for layer in reversed(layers): - yield image_stream_getter(store, layer.blob) - - stream = formatter.build_stream( - tag, - schema1_manifest, - derived_image_id, - layers, - tar_stream_getter_iterator, - reporter=reporter, - ) - - for handler_fn in handlers: - stream = wrap_with_handler(stream, handler_fn) - - return stream.read - - -def _sign_derived_image(verb, derived_image, queue_file): - """ - Read from the queue file and sign the contents which are generated. - - This method runs in a separate process. - """ - signature = None - try: - signature = signer.detached_sign(queue_file) - except Exception as e: - logger.exception( - "Exception when signing %s deriving image %s: $s", verb, derived_image, str(e) - ) - return - - # Setup the database (since this is a new process) and then disconnect immediately - # once the operation completes. - if not queue_file.raised_exception: - with database.UseThenDisconnect(app.config): - registry_model.set_derived_image_signature(derived_image, signer.name, signature) - - -def _write_derived_image_to_storage( - verb, derived_image, queue_file, namespace, repository, tag_name -): - """ - Read from the generated stream and write it back to the storage engine. - - This method runs in a separate process. - """ - - def handle_exception(ex): - logger.debug( - "Exception when building %s derived image %s (%s/%s:%s): %s", - verb, - derived_image, - namespace, - repository, - tag_name, - ex, - ) - - with database.UseThenDisconnect(app.config): - registry_model.delete_derived_image(derived_image) - - queue_file.add_exception_handler(handle_exception) - - # Re-Initialize the storage engine because some may not respond well to forking (e.g. S3) - store = Storage(app, config_provider=config_provider, ip_resolver=ip_resolver) - - try: - store.stream_write( - derived_image.blob.placements, derived_image.blob.storage_path, queue_file - ) - except IOError as ex: - logger.error( - "Exception when writing %s derived image %s (%s/%s:%s): %s", - verb, - derived_image, - namespace, - repository, - tag_name, - ex, - ) - - with database.UseThenDisconnect(app.config): - registry_model.delete_derived_image(derived_image) - - queue_file.close() - - -def _verify_repo_verb(_, namespace, repo_name, tag_name, verb, checker=None): - permission = ReadRepositoryPermission(namespace, repo_name) - repo = model.repository.get_repository(namespace, repo_name) - repo_is_public = repo is not None and model.repository.is_repository_public(repo) - if not permission.can() and not repo_is_public: - logger.debug( - "No permission to read repository %s/%s for user %s with verb %s", - namespace, - repo_name, - get_authenticated_user(), - verb, - ) - abort(403) - - if repo is not None and repo.kind.name != "image": - logger.debug( - "Repository %s/%s for user %s is not an image repo", - namespace, - repo_name, - get_authenticated_user(), - ) - abort(405) - - # Make sure the repo's namespace isn't disabled. - if not registry_model.is_namespace_enabled(namespace): - abort(400) - - # Lookup the requested tag. - repo_ref = registry_model.lookup_repository(namespace, repo_name) - if repo_ref is None: - abort(404) - - tag = registry_model.get_repo_tag(repo_ref, tag_name) - if tag is None: - logger.debug( - "Tag %s does not exist in repository %s/%s for user %s", - tag, - namespace, - repo_name, - get_authenticated_user(), - ) - abort(404) - - # Get its associated manifest. - manifest = registry_model.get_manifest_for_tag(tag, backfill_if_necessary=True) - if manifest is None: - logger.debug("Could not get manifest on %s/%s:%s::%s", namespace, repo_name, tag.name, verb) - abort(404) - - # Retrieve the schema1-compatible version of the manifest. - try: - schema1_manifest = registry_model.get_schema1_parsed_manifest( - manifest, namespace, repo_name, tag.name, storage - ) - except ManifestException: - logger.exception( - "Could not get manifest on %s/%s:%s::%s", namespace, repo_name, tag.name, verb - ) - abort(400) - - if schema1_manifest is None: - abort(404) - - # If there is a data checker, call it first. - if checker is not None: - if not checker(tag, schema1_manifest): - logger.debug( - "Check mismatch on %s/%s:%s, verb %s", namespace, repo_name, tag.name, verb - ) - abort(404) - - # Preload the tag's repository information, so it gets cached. - assert tag.repository.namespace_name - assert tag.repository.name - - return tag, manifest, schema1_manifest - - -def _repo_verb_signature(namespace, repository, tag_name, verb, checker=None, **kwargs): - # Verify that the tag exists and that we have access to it. - tag, manifest, _ = _verify_repo_verb(storage, namespace, repository, tag_name, verb, checker) - - # Find the derived image storage for the verb. - derived_image = registry_model.lookup_derived_image( - manifest, verb, storage, varying_metadata={"tag": tag.name} - ) - - if derived_image is None or derived_image.blob.uploading: - return make_response("", 202) - - # Check if we have a valid signer configured. - if not signer.name: - abort(404) - - # Lookup the signature for the verb. - signature_value = registry_model.get_derived_image_signature(derived_image, signer.name) - if signature_value is None: - abort(404) - - # Return the signature. - return make_response(signature_value) - - -class SimpleHasher(object): - def __init__(self): - self._current_offset = 0 - - def update(self, buf): - self._current_offset += len(buf) - - @property - def hashed_bytes(self): - return self._current_offset - - -@check_region_blacklisted() -def _repo_verb( - namespace, repository, tag_name, verb, formatter, sign=False, checker=None, **kwargs -): - # Verify that the image exists and that we have access to it. - logger.debug( - "Verifying repo verb %s for repository %s/%s with user %s with mimetype %s", - verb, - namespace, - repository, - get_authenticated_user(), - request.accept_mimetypes.best, - ) - tag, manifest, schema1_manifest = _verify_repo_verb( - storage, namespace, repository, tag_name, verb, checker - ) - - # Load the repository for later. - repo = model.repository.get_repository(namespace, repository) - if repo is None: - abort(404) - - # Check for torrent, which is no longer supported. - if request.accept_mimetypes.best == "application/x-bittorrent": - abort(406) - - # Log the action. - track_and_log("repo_verb", wrap_repository(repo), tag=tag.name, verb=verb, **kwargs) - - is_readonly = app.config.get("REGISTRY_STATE", "normal") == "readonly" - - # Lookup/create the derived image for the verb and repo image. - if is_readonly: - derived_image = registry_model.lookup_derived_image( - manifest, verb, storage, varying_metadata={"tag": tag.name}, include_placements=True - ) - else: - derived_image = registry_model.lookup_or_create_derived_image( - manifest, - verb, - storage.preferred_locations[0], - storage, - varying_metadata={"tag": tag.name}, - include_placements=True, - ) - if derived_image is None: - logger.error("Could not create or lookup a derived image for manifest %s", manifest) - abort(400) - - if derived_image is not None and not derived_image.blob.uploading: - logger.debug("Derived %s image %s exists in storage", verb, derived_image) - is_head_request = request.method == "HEAD" - - if derived_image.blob.compressed_size: - image_pulled_bytes.labels("verbs").inc(derived_image.blob.compressed_size) - - download_url = storage.get_direct_download_url( - derived_image.blob.placements, derived_image.blob.storage_path, head=is_head_request - ) - if download_url: - logger.debug("Redirecting to download URL for derived %s image %s", verb, derived_image) - return redirect(download_url) - - # Close the database handle here for this process before we send the long download. - database.close_db_filter(None) - - logger.debug("Sending cached derived %s image %s", verb, derived_image) - return send_file( - storage.stream_read_file( - derived_image.blob.placements, derived_image.blob.storage_path - ), - mimetype=LAYER_MIMETYPE, - ) - - logger.debug("Building and returning derived %s image", verb) - hasher = SimpleHasher() - - # Close the database connection before any process forking occurs. This is important because - # the Postgres driver does not react kindly to forking, so we need to make sure it is closed - # so that each process will get its own unique connection. - database.close_db_filter(None) - - def _cleanup(): - # Close any existing DB connection once the process has exited. - database.close_db_filter(None) - - def _store_metadata_and_cleanup(): - if is_readonly: - return - - with database.UseThenDisconnect(app.config): - registry_model.set_derived_image_size(derived_image, hasher.hashed_bytes) - - # Create a queue process to generate the data. The queue files will read from the process - # and send the results to the client and storage. - unique_id = ( - derived_image.unique_id - if derived_image is not None - else hashlib.sha256(("%s:%s" % (verb, uuid.uuid4())).encode("utf-8")).hexdigest() - ) - handlers = [hasher.update] - reporter = VerbReporter(verb) - args = (formatter, tag, schema1_manifest, unique_id, handlers, reporter) - queue_process = QueueProcess( - _open_stream, - 8 * 1024, - 10 * 1024 * 1024, # 8K/10M chunk/max - args, - finished=_store_metadata_and_cleanup, - ) - - client_queue_file = QueueFile( - queue_process.create_queue(), "client", timeout=QUEUE_FILE_TIMEOUT - ) - - if not is_readonly: - storage_queue_file = QueueFile( - queue_process.create_queue(), "storage", timeout=QUEUE_FILE_TIMEOUT - ) - - # If signing is required, add a QueueFile for signing the image as we stream it out. - signing_queue_file = None - if sign and signer.name: - signing_queue_file = QueueFile( - queue_process.create_queue(), "signing", timeout=QUEUE_FILE_TIMEOUT - ) - - # Start building. - queue_process.run() - - # Start the storage saving. - if not is_readonly: - storage_args = (verb, derived_image, storage_queue_file, namespace, repository, tag_name) - QueueProcess.run_process(_write_derived_image_to_storage, storage_args, finished=_cleanup) - - if sign and signer.name: - signing_args = (verb, derived_image, signing_queue_file) - QueueProcess.run_process(_sign_derived_image, signing_args, finished=_cleanup) - - # Close the database handle here for this process before we send the long download. - database.close_db_filter(None) - - # Return the client's data. - return send_file(client_queue_file, mimetype=LAYER_MIMETYPE) - - -def os_arch_checker(os, arch): - def checker(tag, manifest): - try: - image_json = json.loads(manifest.leaf_layer.raw_v1_metadata) - except ValueError: - logger.exception("Could not parse leaf layer JSON for manifest %s", manifest) - return False - except TypeError: - logger.exception("Could not parse leaf layer JSON for manifest %s", manifest) - return False - - # Verify the architecture and os. - operating_system = image_json.get("os", "linux") - if operating_system != os: - return False - - architecture = image_json.get("architecture", "amd64") - - # Note: Some older Docker images have 'x86_64' rather than 'amd64'. - # We allow the conversion here. - if architecture == "x86_64" and operating_system == "linux": - architecture = "amd64" - - if architecture != arch: - return False - - return True - - return checker - - -def observe_route(protocol): - """ - Decorates verb endpoints to record the image_pulls metric into Prometheus. - """ - - def decorator(func): - @wraps(func) - def wrapper(*args, **kwargs): - rv = func(*args, **kwargs) - image_pulls.labels(protocol, "tag", rv.status_code) - return rv - - return wrapper - - return decorator - - -@route_show_if(features.ACI_CONVERSION) -@anon_protect -@verbs.route("/aci/////sig///", methods=["GET"]) -@verbs.route("/aci/////aci.asc///", methods=["GET"]) -@observe_route("aci") -@process_auth -def get_aci_signature(server, namespace, repository, tag, os, arch): - return _repo_verb_signature( - namespace, repository, tag, "aci", checker=os_arch_checker(os, arch), os=os, arch=arch - ) - - -@route_show_if(features.ACI_CONVERSION) -@anon_protect -@verbs.route( - "/aci/////aci///", methods=["GET", "HEAD"] -) -@observe_route("aci") -@process_auth -def get_aci_image(server, namespace, repository, tag, os, arch): - return _repo_verb( - namespace, - repository, - tag, - "aci", - AppCImageFormatter(), - sign=True, - checker=os_arch_checker(os, arch), - os=os, - arch=arch, - ) - - -@anon_protect -@verbs.route("/squash///", methods=["GET"]) -@observe_route("squash") -@process_auth -def get_squashed_tag(namespace, repository, tag): - return _repo_verb(namespace, repository, tag, "squash", SquashedDockerImageFormatter()) - - -@verbs.route("/_internal_ping") -@anon_allowed -def internal_ping(): - return make_response("true", 200) diff --git a/endpoints/verbs/test/test_security.py b/endpoints/verbs/test/test_security.py deleted file mode 100644 index 5ed065b1d..000000000 --- a/endpoints/verbs/test/test_security.py +++ /dev/null @@ -1,97 +0,0 @@ -import pytest - -from flask import url_for -from endpoints.test.shared import conduct_call, gen_basic_auth -from test.fixtures import * - -NO_ACCESS_USER = "freshuser" -READ_ACCESS_USER = "reader" -ADMIN_ACCESS_USER = "devtable" -CREATOR_ACCESS_USER = "creator" - -PUBLIC_REPO = "public/publicrepo" -PRIVATE_REPO = "devtable/shared" -ORG_REPO = "buynlarge/orgrepo" -ANOTHER_ORG_REPO = "buynlarge/anotherorgrepo" - -ACI_ARGS = { - "server": "someserver", - "tag": "fake", - "os": "linux", - "arch": "x64", -} - - -@pytest.mark.parametrize( - "user", - [ - (0, None), - (1, NO_ACCESS_USER), - (2, READ_ACCESS_USER), - (3, CREATOR_ACCESS_USER), - (4, ADMIN_ACCESS_USER), - ], -) -@pytest.mark.parametrize( - "endpoint,method,repository,single_repo_path,params,expected_statuses", - [ - ("get_aci_signature", "GET", PUBLIC_REPO, False, ACI_ARGS, (404, 404, 404, 404, 404)), - ("get_aci_signature", "GET", PRIVATE_REPO, False, ACI_ARGS, (403, 403, 404, 403, 404)), - ("get_aci_signature", "GET", ORG_REPO, False, ACI_ARGS, (403, 403, 404, 403, 404)), - ("get_aci_signature", "GET", ANOTHER_ORG_REPO, False, ACI_ARGS, (403, 403, 403, 403, 404)), - # get_aci_image - ("get_aci_image", "GET", PUBLIC_REPO, False, ACI_ARGS, (404, 404, 404, 404, 404)), - ("get_aci_image", "GET", PRIVATE_REPO, False, ACI_ARGS, (403, 403, 404, 403, 404)), - ("get_aci_image", "GET", ORG_REPO, False, ACI_ARGS, (403, 403, 404, 403, 404)), - ("get_aci_image", "GET", ANOTHER_ORG_REPO, False, ACI_ARGS, (403, 403, 403, 403, 404)), - # get_squashed_tag - ( - "get_squashed_tag", - "GET", - PUBLIC_REPO, - False, - dict(tag="fake"), - (404, 404, 404, 404, 404), - ), - ( - "get_squashed_tag", - "GET", - PRIVATE_REPO, - False, - dict(tag="fake"), - (403, 403, 404, 403, 404), - ), - ("get_squashed_tag", "GET", ORG_REPO, False, dict(tag="fake"), (403, 403, 404, 403, 404)), - ( - "get_squashed_tag", - "GET", - ANOTHER_ORG_REPO, - False, - dict(tag="fake"), - (403, 403, 403, 403, 404), - ), - ], -) -def test_verbs_security( - user, endpoint, method, repository, single_repo_path, params, expected_statuses, app, client -): - headers = {} - if user[1] is not None: - headers["Authorization"] = gen_basic_auth(user[1], "password") - - if single_repo_path: - params["repository"] = repository - else: - (namespace, repo_name) = repository.split("/") - params["namespace"] = namespace - params["repository"] = repo_name - - conduct_call( - client, - "verbs." + endpoint, - url_for, - method, - params, - expected_code=expected_statuses[user[0]], - headers=headers, - ) diff --git a/endpoints/web.py b/endpoints/web.py index 48072b436..de0738161 100644 --- a/endpoints/web.py +++ b/endpoints/web.py @@ -27,7 +27,6 @@ from app import ( billing as stripe, build_logs, avatar, - signer, log_archive, config_provider, get_app_url, @@ -144,17 +143,6 @@ def user_view(path): return index("") -@route_show_if(features.ACI_CONVERSION) -@web.route("/aci-signing-key") -@no_cache -@anon_protect -def aci_signing_key(): - if not signer.name: - abort(404) - - return send_file(signer.open_public_key_file(), mimetype=PGP_KEY_MIMETYPE) - - @web.route("/plans/") @no_cache @route_show_if(features.BILLING) diff --git a/health/services.py b/health/services.py index 7d831a3c4..0d4c5ea68 100644 --- a/health/services.py +++ b/health/services.py @@ -178,7 +178,6 @@ def _check_disk_space(for_warning): _INSTANCE_SERVICES = { "registry_gunicorn": _check_gunicorn("v1/_internal_ping"), "web_gunicorn": _check_gunicorn("_internal_ping"), - "verbs_gunicorn": _check_gunicorn("c1/_internal_ping"), "service_key": _check_service_key, "disk_space": _check_disk_space(for_warning=False), "jwtproxy": _check_jwt_proxy, diff --git a/image/appc/__init__.py b/image/appc/__init__.py deleted file mode 100644 index 60c74a415..000000000 --- a/image/appc/__init__.py +++ /dev/null @@ -1,227 +0,0 @@ -import json -import re -import calendar - -from uuid import uuid4 - -from app import app -from util.registry.streamlayerformat import StreamLayerMerger -from util.dict_wrappers import JSONPathDict -from image.common import TarImageFormatter - - -ACNAME_REGEX = re.compile(r"[^a-z-]+") - - -class AppCImageFormatter(TarImageFormatter): - """ - Image formatter which produces an tarball according to the AppC specification. - """ - - def stream_generator( - self, - tag, - parsed_manifest, - synthetic_image_id, - layer_iterator, - tar_stream_getter_iterator, - reporter=None, - ): - image_mtime = 0 - created = parsed_manifest.created_datetime - if created is not None: - image_mtime = calendar.timegm(created.utctimetuple()) - - # ACI Format (.tar): - # manifest - The JSON manifest - # rootfs - The root file system - - # Yield the manifest. - aci_manifest = json.dumps( - DockerV1ToACIManifestTranslator.build_manifest(tag, parsed_manifest, synthetic_image_id) - ) - yield self.tar_file("manifest", aci_manifest.encode("utf-8"), mtime=image_mtime) - - # Yield the merged layer dtaa. - yield self.tar_folder("rootfs", mtime=image_mtime) - - layer_merger = StreamLayerMerger( - tar_stream_getter_iterator, path_prefix="rootfs/", reporter=reporter - ) - for entry in layer_merger.get_generator(): - yield entry - - -class DockerV1ToACIManifestTranslator(object): - @staticmethod - def _build_isolators(docker_config): - """ - Builds ACI isolator config from the docker config. - """ - - def _isolate_memory(memory): - return {"name": "memory/limit", "value": {"request": str(memory) + "B",}} - - def _isolate_swap(memory): - return {"name": "memory/swap", "value": {"request": str(memory) + "B",}} - - def _isolate_cpu(cpu): - return {"name": "cpu/shares", "value": {"request": str(cpu),}} - - def _isolate_capabilities(capabilities_set_value): - capabilities_set = re.split(r"[\s,]", capabilities_set_value) - return {"name": "os/linux/capabilities-retain-set", "value": {"set": capabilities_set,}} - - mappers = { - "Memory": _isolate_memory, - "MemorySwap": _isolate_swap, - "CpuShares": _isolate_cpu, - "Cpuset": _isolate_capabilities, - } - - isolators = [] - - for config_key in mappers: - value = docker_config.get(config_key) - if value: - isolators.append(mappers[config_key](value)) - - return isolators - - @staticmethod - def _build_ports(docker_config): - """ - Builds the ports definitions for the ACI. - - Formats: - port/tcp - port/udp - port - """ - ports = [] - - exposed_ports = docker_config["ExposedPorts"] - if exposed_ports is not None: - port_list = list(exposed_ports.keys()) - else: - port_list = docker_config["Ports"] or docker_config["ports"] or [] - - for docker_port in port_list: - protocol = "tcp" - port_number = -1 - - if "/" in docker_port: - (port_number, protocol) = docker_port.split("/") - else: - port_number = docker_port - - try: - port_number = int(port_number) - ports.append( - {"name": "port-%s" % port_number, "port": port_number, "protocol": protocol,} - ) - except ValueError: - pass - - return ports - - @staticmethod - def _ac_name(value): - sanitized = ACNAME_REGEX.sub("-", value.lower()).strip("-") - if sanitized == "": - return str(uuid4()) - return sanitized - - @staticmethod - def _build_volumes(docker_config): - """ - Builds the volumes definitions for the ACI. - """ - volumes = [] - - def get_name(docker_volume_path): - volume_name = DockerV1ToACIManifestTranslator._ac_name(docker_volume_path) - return "volume-%s" % volume_name - - volume_list = docker_config["Volumes"] or docker_config["volumes"] or {} - for docker_volume_path in volume_list.keys(): - if not docker_volume_path: - continue - - volumes.append( - { - "name": get_name(docker_volume_path), - "path": docker_volume_path, - "readOnly": False, - } - ) - return volumes - - @staticmethod - def build_manifest(tag, manifest, synthetic_image_id): - """ - Builds an ACI manifest of an existing repository image. - """ - docker_layer_data = JSONPathDict(json.loads(manifest.leaf_layer.raw_v1_metadata)) - config = docker_layer_data["config"] or JSONPathDict({}) - - namespace = tag.repository.namespace_name - repo_name = tag.repository.name - source_url = "%s://%s/%s/%s:%s" % ( - app.config["PREFERRED_URL_SCHEME"], - app.config["SERVER_HOSTNAME"], - namespace, - repo_name, - tag.name, - ) - - # ACI requires that the execution command be absolutely referenced. Therefore, if we find - # a relative command, we give it as an argument to /bin/sh to resolve and execute for us. - entrypoint = config["Entrypoint"] or [] - exec_path = entrypoint + (config["Cmd"] or []) - if exec_path and not exec_path[0].startswith("/"): - exec_path = ["/bin/sh", "-c", '""%s""' % " ".join(exec_path)] - - # TODO: ACI doesn't support : in the name, so remove any ports. - hostname = app.config["SERVER_HOSTNAME"] - hostname = hostname.split(":", 1)[0] - - # Calculate the environment variables. - docker_env_vars = config.get("Env") or [] - env_vars = [] - for var in docker_env_vars: - pieces = var.split("=") - if len(pieces) != 2: - continue - - env_vars.append(pieces) - - manifest = { - "acKind": "ImageManifest", - "acVersion": "0.6.1", - "name": "%s/%s/%s" % (hostname.lower(), namespace.lower(), repo_name.lower()), - "labels": [ - {"name": "version", "value": tag.name,}, - {"name": "arch", "value": docker_layer_data.get("architecture") or "amd64"}, - {"name": "os", "value": docker_layer_data.get("os") or "linux"}, - ], - "app": { - "exec": exec_path, - # Below, `or 'root'` is required to replace empty string from Dockerfiles. - "user": config.get("User") or "root", - "group": config.get("Group") or "root", - "eventHandlers": [], - "workingDirectory": config.get("WorkingDir") or "/", - "environment": [{"name": key, "value": value} for (key, value) in env_vars], - "isolators": DockerV1ToACIManifestTranslator._build_isolators(config), - "mountPoints": DockerV1ToACIManifestTranslator._build_volumes(config), - "ports": DockerV1ToACIManifestTranslator._build_ports(config), - "annotations": [ - {"name": "created", "value": docker_layer_data.get("created") or ""}, - {"name": "homepage", "value": source_url}, - {"name": "quay.io/derived-image", "value": synthetic_image_id}, - ], - }, - } - - return manifest diff --git a/image/appc/test/test_appc.py b/image/appc/test/test_appc.py deleted file mode 100644 index a068a0c4a..000000000 --- a/image/appc/test/test_appc.py +++ /dev/null @@ -1,74 +0,0 @@ -import pytest - -from image.appc import DockerV1ToACIManifestTranslator -from util.dict_wrappers import JSONPathDict - - -EXAMPLE_MANIFEST_OBJ = { - "architecture": "amd64", - "config": { - "Hostname": "1d811a9194c4", - "Domainname": "", - "User": "", - "AttachStdin": False, - "AttachStdout": False, - "AttachStderr": False, - "ExposedPorts": {"2379/tcp": {}, "2380/tcp": {}}, - "Tty": False, - "OpenStdin": False, - "StdinOnce": False, - "Env": ["PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"], - "Cmd": ["/usr/local/bin/etcd"], - "ArgsEscaped": True, - "Image": "sha256:4c86d1f362d42420c137846fae31667ee85ce6f2cab406cdff26a8ff8a2c31c4", - "Volumes": None, - "WorkingDir": "", - "Entrypoint": None, - "OnBuild": [], - "Labels": {}, - }, - "container": "5a3565ce9b808a0eb0bcbc966dad624f76ad308ad24e11525b5da1201a1df135", - "container_config": { - "Hostname": "1d811a9194c4", - "Domainname": "", - "User": "", - "AttachStdin": False, - "AttachStdout": False, - "AttachStderr": False, - "ExposedPorts": {"2379/tcp": {}, "2380/tcp": {}}, - "Tty": False, - "OpenStdin": False, - "StdinOnce": False, - "Env": ["PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"], - "Cmd": ["/bin/sh", "-c", '#(nop) CMD ["/usr/local/bin/etcd"]'], - "ArgsEscaped": True, - "Image": "sha256:4c86d1f362d42420c137846fae31667ee85ce6f2cab406cdff26a8ff8a2c31c4", - "Volumes": None, - "WorkingDir": "", - "Entrypoint": None, - "OnBuild": [], - "Labels": {}, - }, - "created": "2016-11-11T19:03:55.137387628Z", - "docker_version": "1.11.1", - "id": "3314a3781a526fe728e2e96cfcfb3cc0de901b5c102e6204e8b0155c8f7d5fd2", - "os": "linux", - "parent": "625342ec4d0f3d7a96fd3bb1ef0b4b0b6bc65ebb3d252fd33af0691f7984440e", - "throwaway": True, -} - - -@pytest.mark.parametrize( - "vcfg,expected", - [ - ({"Volumes": None}, []), - ({"Volumes": {}}, []), - ({"Volumes": {"/bin": {}}}, [{"name": "volume-bin", "path": "/bin", "readOnly": False}]), - ({"volumes": None}, []), - ({"volumes": {}}, []), - ({"volumes": {"/bin": {}}}, [{"name": "volume-bin", "path": "/bin", "readOnly": False}]), - ], -) -def test_volume_version_easy(vcfg, expected): - output = DockerV1ToACIManifestTranslator._build_volumes(JSONPathDict(vcfg)) - assert output == expected diff --git a/image/common.py b/image/common.py deleted file mode 100644 index 7efd9731f..000000000 --- a/image/common.py +++ /dev/null @@ -1,89 +0,0 @@ -import tarfile -from util.registry.gzipwrap import GzipWrap - - -class TarImageFormatter(object): - """ - Base class for classes which produce a tar containing image and layer data. - """ - - def build_stream( - self, - tag, - manifest, - synthetic_image_id, - layer_iterator, - tar_stream_getter_iterator, - reporter=None, - ): - """ - Builds and streams a synthetic .tar.gz that represents the formatted tar created by this - class's implementation. - """ - return GzipWrap( - self.stream_generator( - tag, - manifest, - synthetic_image_id, - layer_iterator, - tar_stream_getter_iterator, - reporter=reporter, - ) - ) - - def stream_generator( - self, - tag, - manifest, - synthetic_image_id, - layer_iterator, - tar_stream_getter_iterator, - reporter=None, - ): - raise NotImplementedError - - def tar_file(self, name, contents, mtime=None): - """ - Returns the tar binary representation for a file with the given name and file contents. - """ - assert isinstance(contents, bytes) - length = len(contents) - tar_data = self.tar_file_header(name, length, mtime=mtime) - tar_data += contents - tar_data += self.tar_file_padding(length) - return tar_data - - def tar_file_padding(self, length): - """ - Returns tar file padding for file data of the given length. - """ - if length % 512 != 0: - return b"\0" * (512 - (length % 512)) - - return b"" - - def tar_file_header(self, name, file_size, mtime=None): - """ - Returns tar file header data for a file with the given name and size. - """ - info = tarfile.TarInfo(name=name) - info.type = tarfile.REGTYPE - info.size = file_size - - if mtime is not None: - info.mtime = mtime - return info.tobuf() - - def tar_folder(self, name, mtime=None): - """ - Returns tar file header data for a folder with the given name. - """ - info = tarfile.TarInfo(name=name) - info.type = tarfile.DIRTYPE - - if mtime is not None: - info.mtime = mtime - - # allow the directory to be readable by non-root users - info.mode = 0o755 - return info.tobuf() diff --git a/image/docker/schema1.py b/image/docker/schema1.py index 3d1bde8ec..7e541f198 100644 --- a/image/docker/schema1.py +++ b/image/docker/schema1.py @@ -220,7 +220,17 @@ class DockerSchema1Manifest(ManifestInterface): Raises a ManifestException on failure. """ - # Already validated. + # Validate the parent image IDs. + encountered_ids = set() + for layer in self.layers: + if layer.v1_metadata.parent_image_id: + if layer.v1_metadata.parent_image_id not in encountered_ids: + raise ManifestException( + "Unknown parent image %s" % layer.v1_metadata.parent_image_id + ) + + if layer.v1_metadata.image_id: + encountered_ids.add(layer.v1_metadata.image_id) @property def is_signed(self): @@ -283,6 +293,10 @@ class DockerSchema1Manifest(ManifestInterface): @property def layers_compressed_size(self): + return sum(l.compressed_size for l in self.layers if l.compressed_size is not None) + + @property + def config_media_type(self): return None @property diff --git a/image/docker/schema2/list.py b/image/docker/schema2/list.py index b2bfbe757..0d29a8229 100644 --- a/image/docker/schema2/list.py +++ b/image/docker/schema2/list.py @@ -6,7 +6,7 @@ from jsonschema import validate as validate_schema, ValidationError from digest import digest_tools from image.shared import ManifestException -from image.shared.interfaces import ManifestInterface +from image.shared.interfaces import ManifestListInterface from image.shared.schemautil import LazyManifestLoader from image.docker.schema1 import DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE from image.docker.schema1 import DockerSchema1Manifest @@ -53,7 +53,7 @@ class MismatchManifestException(MalformedSchema2ManifestList): pass -class DockerSchema2ManifestList(ManifestInterface): +class DockerSchema2ManifestList(ManifestListInterface): METASCHEMA = { "type": "object", "properties": { @@ -228,6 +228,10 @@ class DockerSchema2ManifestList(ManifestInterface): def layers_compressed_size(self): return None + @property + def config_media_type(self): + return None + @lru_cache(maxsize=1) def manifests(self, content_retriever): """ @@ -249,6 +253,20 @@ class DockerSchema2ManifestList(ManifestInterface): for m in manifests ] + @property + def amd64_linux_manifest_digest(self): + """ Returns the digest of the AMD64+Linux manifest in this list, if any, or None + if none. + """ + for manifest_ref in self._parsed[DOCKER_SCHEMA2_MANIFESTLIST_MANIFESTS_KEY]: + platform = manifest_ref[DOCKER_SCHEMA2_MANIFESTLIST_PLATFORM_KEY] + architecture = platform[DOCKER_SCHEMA2_MANIFESTLIST_ARCHITECTURE_KEY] + os = platform[DOCKER_SCHEMA2_MANIFESTLIST_OS_KEY] + if architecture == "amd64" and os == "linux": + return manifest_ref[DOCKER_SCHEMA2_MANIFESTLIST_DIGEST_KEY] + + return None + def validate(self, content_retriever): """ Performs validation of required assertions about the manifest. diff --git a/image/docker/schema2/manifest.py b/image/docker/schema2/manifest.py index 8716851a4..6a244a56f 100644 --- a/image/docker/schema2/manifest.py +++ b/image/docker/schema2/manifest.py @@ -172,7 +172,7 @@ class DockerSchema2Manifest(ManifestInterface): Raises a ManifestException on failure. """ - # Nothing to validate. + self._get_built_config(content_retriever) @property def is_manifest_list(self): @@ -222,6 +222,12 @@ class DockerSchema2Manifest(ManifestInterface): def layers_compressed_size(self): return sum(layer.compressed_size for layer in self.filesystem_layers) + @property + def config_media_type(self): + return self._parsed[DOCKER_SCHEMA2_MANIFEST_CONFIG_KEY][ + DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY + ] + @property def has_remote_layer(self): for layer in self.filesystem_layers: diff --git a/image/docker/schema2/test/test_list.py b/image/docker/schema2/test/test_list.py index 568a67521..88e70fcd3 100644 --- a/image/docker/schema2/test/test_list.py +++ b/image/docker/schema2/test/test_list.py @@ -50,7 +50,7 @@ MANIFESTLIST_BYTES = json.dumps( }, { "mediaType": "application/vnd.docker.distribution.manifest.v1+json", - "size": 878, + "size": 1051, "digest": "sha256:5b", "platform": {"architecture": "amd64", "os": "linux", "features": ["sse4"]}, }, @@ -84,6 +84,8 @@ def test_valid_manifestlist(): assert manifestlist.bytes.as_encoded_str() == MANIFESTLIST_BYTES assert manifestlist.manifest_dict == json.loads(MANIFESTLIST_BYTES) assert manifestlist.get_layers(retriever) is None + assert manifestlist.config_media_type is None + assert manifestlist.layers_compressed_size is None assert not manifestlist.blob_digests for index, manifest in enumerate(manifestlist.manifests(retriever)): @@ -114,6 +116,8 @@ def test_valid_manifestlist(): # Ensure it validates. manifestlist.validate(retriever) + assert manifestlist.amd64_linux_manifest_digest == "sha256:5b" + def test_get_schema1_manifest_no_matching_list(): manifestlist = DockerSchema2ManifestList(Bytes.for_string_or_unicode(NO_AMD_MANIFESTLIST_BYTES)) @@ -121,6 +125,7 @@ def test_get_schema1_manifest_no_matching_list(): assert manifestlist.media_type == "application/vnd.docker.distribution.manifest.list.v2+json" assert manifestlist.bytes.as_encoded_str() == NO_AMD_MANIFESTLIST_BYTES + assert manifestlist.amd64_linux_manifest_digest is None compatible_manifest = manifestlist.get_schema1_manifest("foo", "bar", "baz", retriever) assert compatible_manifest is None @@ -130,10 +135,22 @@ def test_builder(): existing = DockerSchema2ManifestList(Bytes.for_string_or_unicode(MANIFESTLIST_BYTES)) builder = DockerSchema2ManifestListBuilder() for index, manifest in enumerate(existing.manifests(retriever)): - builder.add_manifest(manifest.manifest_obj, "amd64", "os") + builder.add_manifest(manifest.manifest_obj, "amd64", "linux") built = builder.build() assert len(built.manifests(retriever)) == 2 + assert built.amd64_linux_manifest_digest is not None + + +def test_builder_no_amd(): + existing = DockerSchema2ManifestList(Bytes.for_string_or_unicode(MANIFESTLIST_BYTES)) + builder = DockerSchema2ManifestListBuilder() + for index, manifest in enumerate(existing.manifests(retriever)): + builder.add_manifest(manifest.manifest_obj, "intel386", "os") + + built = builder.build() + assert len(built.manifests(retriever)) == 2 + assert built.amd64_linux_manifest_digest is None def test_invalid_manifestlist(): diff --git a/image/docker/schema2/test/test_manifest.py b/image/docker/schema2/test/test_manifest.py index ca1d4e7f7..c5a50ffe2 100644 --- a/image/docker/schema2/test/test_manifest.py +++ b/image/docker/schema2/test/test_manifest.py @@ -119,6 +119,8 @@ def test_valid_manifest(): assert manifest.media_type == "application/vnd.docker.distribution.manifest.v2+json" assert not manifest.has_remote_layer assert manifest.has_legacy_image + assert manifest.config_media_type == "application/vnd.docker.container.image.v1+json" + assert manifest.layers_compressed_size == 123721 retriever = ContentRetrieverForTesting.for_config( { @@ -171,6 +173,8 @@ def test_valid_remote_manifest(): ) assert manifest.media_type == "application/vnd.docker.distribution.manifest.v2+json" assert manifest.has_remote_layer + assert manifest.config_media_type == "application/vnd.docker.container.image.v1+json" + assert manifest.layers_compressed_size == 123721 assert len(manifest.filesystem_layers) == 4 assert manifest.filesystem_layers[0].compressed_size == 1234 diff --git a/image/docker/squashed.py b/image/docker/squashed.py deleted file mode 100644 index f4927f378..000000000 --- a/image/docker/squashed.py +++ /dev/null @@ -1,149 +0,0 @@ -import copy -import json -import math -import calendar - -from app import app -from image.common import TarImageFormatter -from util.registry.gzipwrap import GZIP_BUFFER_SIZE -from util.registry.streamlayerformat import StreamLayerMerger - - -class FileEstimationException(Exception): - """ - Exception raised by build_docker_load_stream if the estimated size of the layer tar was lower - than the actual size. - - This means the sent tar header is wrong, and we have to fail. - """ - - pass - - -class SquashedDockerImageFormatter(TarImageFormatter): - """ - Image formatter which produces a squashed image compatible with the `docker load` command. - """ - - # Multiplier against the image size reported by Docker to account for the tar metadata. - # Note: This multiplier was not formally calculated in anyway and should be adjusted overtime - # if/when we encounter issues with it. Unfortunately, we cannot make it too large or the Docker - # daemon dies when trying to load the entire tar into memory. - SIZE_MULTIPLIER = 1.2 - - def stream_generator( - self, - tag, - parsed_manifest, - synthetic_image_id, - layer_iterator, - tar_stream_getter_iterator, - reporter=None, - ): - image_mtime = 0 - created = parsed_manifest.created_datetime - if created is not None: - image_mtime = calendar.timegm(created.utctimetuple()) - - # Docker import V1 Format (.tar): - # repositories - JSON file containing a repo -> tag -> image map - # {image ID folder}: - # json - The layer JSON - # layer.tar - The tarballed contents of the layer - # VERSION - The docker import version: '1.0' - layer_merger = StreamLayerMerger(tar_stream_getter_iterator, reporter=reporter) - - # Yield the repositories file: - synthetic_layer_info = {} - synthetic_layer_info[tag.name + ".squash"] = synthetic_image_id - - hostname = app.config["SERVER_HOSTNAME"] - repositories = {} - namespace = tag.repository.namespace_name - repository = tag.repository.name - repositories[hostname + "/" + namespace + "/" + repository] = synthetic_layer_info - - yield self.tar_file( - "repositories", json.dumps(repositories).encode("utf-8"), mtime=image_mtime - ) - - # Yield the image ID folder. - yield self.tar_folder(synthetic_image_id, mtime=image_mtime) - - # Yield the JSON layer data. - layer_json = SquashedDockerImageFormatter._build_layer_json( - parsed_manifest, synthetic_image_id - ) - yield self.tar_file( - synthetic_image_id + "/json", json.dumps(layer_json).encode("utf-8"), mtime=image_mtime - ) - - # Yield the VERSION file. - yield self.tar_file(synthetic_image_id + "/VERSION", b"1.0", mtime=image_mtime) - - # Yield the merged layer data's header. - estimated_file_size = 0 - for layer in layer_iterator: - estimated_file_size += layer.estimated_size( - SquashedDockerImageFormatter.SIZE_MULTIPLIER - ) - - # Make sure the estimated file size is an integer number of bytes. - estimated_file_size = int(math.ceil(estimated_file_size)) - - yield self.tar_file_header( - synthetic_image_id + "/layer.tar", estimated_file_size, mtime=image_mtime - ) - - # Yield the contents of the merged layer. - yielded_size = 0 - for entry in layer_merger.get_generator(): - yield entry - yielded_size += len(entry) - - # If the yielded size is more than the estimated size (which is unlikely but possible), then - # raise an exception since the tar header will be wrong. - if yielded_size > estimated_file_size: - leaf_image_id = parsed_manifest.leaf_layer_v1_image_id - message = "For %s/%s:%s (%s:%s): Expected %s bytes, found %s bytes" % ( - namespace, - repository, - tag, - parsed_manifest.digest, - leaf_image_id, - estimated_file_size, - yielded_size, - ) - raise FileEstimationException(message) - - # If the yielded size is less than the estimated size (which is likely), fill the rest with - # zeros. - if yielded_size < estimated_file_size: - to_yield = estimated_file_size - yielded_size - while to_yield > 0: - yielded = min(to_yield, GZIP_BUFFER_SIZE) - yield b"\0" * yielded - to_yield -= yielded - - # Yield any file padding to 512 bytes that is necessary. - yield self.tar_file_padding(estimated_file_size) - - # Last two records are empty in tar spec. - yield b"\0" * 512 - yield b"\0" * 512 - - @staticmethod - def _build_layer_json(manifest, synthetic_image_id): - updated_json = json.loads(manifest.leaf_layer.raw_v1_metadata) - updated_json["id"] = synthetic_image_id - - if "parent" in updated_json: - del updated_json["parent"] - - if "config" in updated_json and "Image" in updated_json["config"]: - updated_json["config"]["Image"] = synthetic_image_id - - if "container_config" in updated_json and "Image" in updated_json["container_config"]: - updated_json["container_config"]["Image"] = synthetic_image_id - - return updated_json diff --git a/image/docker/test/test_schema1.py b/image/docker/test/test_schema1.py index b86270e97..c9e27a936 100644 --- a/image/docker/test/test_schema1.py +++ b/image/docker/test/test_schema1.py @@ -37,10 +37,12 @@ MANIFEST_BYTES = json.dumps( "tag": "latest", "architecture": "amd64", "fsLayers": [ + {"blobSum": "sha256:cd8567d70002e957612902a8e985ea129d831ebe04057d88fb644857caa45d11"}, {"blobSum": "sha256:cc8567d70002e957612902a8e985ea129d831ebe04057d88fb644857caa45d11"}, {"blobSum": "sha256:5f70bf18a086007016e948b04aed3b82103a36bea41755b6cddfaf10ace3c6ef"}, ], "history": [ + {"v1Compatibility": '{"id":"sizedid", "parent": "someid", "Size": 1234}'}, {"v1Compatibility": '{"id":"someid", "parent": "anotherid"}'}, {"v1Compatibility": '{"id":"anotherid"}'}, ], @@ -71,10 +73,12 @@ def test_valid_manifest(): assert manifest.namespace == "" assert manifest.repo_name == "hello-world" assert manifest.tag == "latest" - assert manifest.image_ids == {"someid", "anotherid"} - assert manifest.parent_image_ids == {"anotherid"} + assert manifest.image_ids == {"sizedid", "someid", "anotherid"} + assert manifest.parent_image_ids == {"someid", "anotherid"} + assert manifest.layers_compressed_size == 1234 + assert manifest.config_media_type is None - assert len(manifest.layers) == 2 + assert len(manifest.layers) == 3 assert manifest.layers[0].v1_metadata.image_id == "anotherid" assert manifest.layers[0].v1_metadata.parent_image_id is None @@ -82,10 +86,14 @@ def test_valid_manifest(): assert manifest.layers[1].v1_metadata.image_id == "someid" assert manifest.layers[1].v1_metadata.parent_image_id == "anotherid" + assert manifest.layers[2].v1_metadata.image_id == "sizedid" + assert manifest.layers[2].v1_metadata.parent_image_id == "someid" + assert manifest.layers[0].compressed_size is None assert manifest.layers[1].compressed_size is None + assert manifest.layers[2].compressed_size == 1234 - assert manifest.leaf_layer == manifest.layers[1] + assert manifest.leaf_layer == manifest.layers[2] assert manifest.created_datetime is None unsigned = manifest.unsigned() @@ -97,8 +105,8 @@ def test_valid_manifest(): assert unsigned.digest != manifest.digest image_layers = list(manifest.get_layers(None)) - assert len(image_layers) == 2 - for index in range(0, 2): + assert len(image_layers) == 3 + for index in range(0, 3): assert image_layers[index].layer_id == manifest.layers[index].v1_metadata.image_id assert image_layers[index].blob_digest == manifest.layers[index].digest assert image_layers[index].command == manifest.layers[index].v1_metadata.command diff --git a/image/oci/index.py b/image/oci/index.py index fa6bd341a..1aff53f8b 100644 --- a/image/oci/index.py +++ b/image/oci/index.py @@ -41,7 +41,7 @@ from jsonschema import validate as validate_schema, ValidationError from digest import digest_tools from image.shared import ManifestException -from image.shared.interfaces import ManifestInterface +from image.shared.interfaces import ManifestListInterface from image.shared.schemautil import LazyManifestLoader from image.oci import OCI_IMAGE_INDEX_CONTENT_TYPE, OCI_IMAGE_MANIFEST_CONTENT_TYPE from image.oci.descriptor import get_descriptor_schema @@ -81,7 +81,7 @@ class MalformedIndex(ManifestException): pass -class OCIIndex(ManifestInterface): +class OCIIndex(ManifestListInterface): METASCHEMA = { "type": "object", "properties": { @@ -227,6 +227,10 @@ class OCIIndex(ManifestInterface): def layers_compressed_size(self): return None + @property + def config_media_type(self): + return None + @lru_cache(maxsize=1) def manifests(self, content_retriever): """ @@ -275,6 +279,20 @@ class OCIIndex(ManifestInterface): def has_legacy_image(self): return False + @property + def amd64_linux_manifest_digest(self): + """ Returns the digest of the AMD64+Linux manifest in this list, if any, or None + if none. + """ + for manifest_ref in self._parsed[INDEX_MANIFESTS_KEY]: + platform = manifest_ref[INDEX_PLATFORM_KEY] + architecture = platform.get(INDEX_ARCHITECTURE_KEY, None) + os = platform.get(INDEX_OS_KEY, None) + if architecture == "amd64" and os == "linux": + return manifest_ref[INDEX_DIGEST_KEY] + + return None + def get_requires_empty_layer_blob(self, content_retriever): return False diff --git a/image/oci/manifest.py b/image/oci/manifest.py index 909c75263..b3e44b1c4 100644 --- a/image/oci/manifest.py +++ b/image/oci/manifest.py @@ -197,6 +197,10 @@ class OCIManifest(ManifestInterface): """ return self.filesystem_layers[-1] + @property + def config_media_type(self): + return self._parsed[OCI_MANIFEST_CONFIG_KEY][OCI_MANIFEST_MEDIATYPE_KEY] + @property def layers_compressed_size(self): return sum(layer.compressed_size for layer in self.filesystem_layers) diff --git a/image/oci/test/test_oci_index.py b/image/oci/test/test_oci_index.py index 84c8d747c..0825df0af 100644 --- a/image/oci/test/test_oci_index.py +++ b/image/oci/test/test_oci_index.py @@ -34,6 +34,35 @@ SAMPLE_INDEX = """{ }""" +SAMPLE_INDEX_NO_AMD = """{ + "schemaVersion": 2, + "manifests": [ + { + "mediaType": "application/vnd.oci.image.manifest.v1+json", + "size": 7143, + "digest": "sha256:e692418e4cbaf90ca69d05a66403747baa33ee08806650b51fab815ad7fc331f", + "platform": { + "architecture": "ppc64le", + "os": "linux" + } + }, + { + "mediaType": "application/vnd.oci.image.manifest.v1+json", + "size": 7682, + "digest": "sha256:5b0bcabd1ed22e9fb1310cf6c2dec7cdef19f0ad69efa1f392e94a4333501270", + "platform": { + "architecture": "intel386", + "os": "linux" + } + } + ], + "annotations": { + "com.example.key1": "value1", + "com.example.key2": "value2" + } +}""" + + def test_parse_basic_index(): index = OCIIndex(Bytes.for_string_or_unicode(SAMPLE_INDEX)) assert index.is_manifest_list @@ -43,6 +72,10 @@ def test_parse_basic_index(): "sha256:e692418e4cbaf90ca69d05a66403747baa33ee08806650b51fab815ad7fc331f", "sha256:5b0bcabd1ed22e9fb1310cf6c2dec7cdef19f0ad69efa1f392e94a4333501270", ] + assert ( + index.amd64_linux_manifest_digest + == "sha256:5b0bcabd1ed22e9fb1310cf6c2dec7cdef19f0ad69efa1f392e94a4333501270" + ) def test_config_missing_required(): @@ -56,3 +89,15 @@ def test_config_missing_required(): def test_invalid_index(): with pytest.raises(MalformedIndex): OCIIndex(Bytes.for_string_or_unicode("{}")) + + +def test_index_without_amd(): + index = OCIIndex(Bytes.for_string_or_unicode(SAMPLE_INDEX_NO_AMD)) + assert index.is_manifest_list + assert index.digest == "sha256:a0ed0f2b3949bc731063320667062307faf4245f6872dc5bc98ee6ea5443f169" + assert index.local_blob_digests == [] + assert index.child_manifest_digests() == [ + "sha256:e692418e4cbaf90ca69d05a66403747baa33ee08806650b51fab815ad7fc331f", + "sha256:5b0bcabd1ed22e9fb1310cf6c2dec7cdef19f0ad69efa1f392e94a4333501270", + ] + assert index.amd64_linux_manifest_digest is None diff --git a/image/shared/interfaces.py b/image/shared/interfaces.py index 661f840ae..bd158cb62 100644 --- a/image/shared/interfaces.py +++ b/image/shared/interfaces.py @@ -56,6 +56,12 @@ class ManifestInterface(object): Returns None if this cannot be computed locally. """ + @abstractproperty + def config_media_type(self): + """ Returns the media type of the config of this manifest or None if + this manifest does not support a configuration type. + """ + @abstractmethod def validate(self, content_retriever): """ @@ -184,6 +190,19 @@ class ManifestInterface(object): """ +@add_metaclass(ABCMeta) +class ManifestListInterface(object): + """ + Defines the interface for the various manifest list types supported. + """ + + @abstractmethod + def amd64_linux_manifest_digest(self): + """ Returns the digest of the AMD64+Linux manifest in this list, if any, or None + if none. + """ + + @add_metaclass(ABCMeta) class ContentRetriever(object): """ diff --git a/initdb.py b/initdb.py index cd7802ffa..bd17a8481 100644 --- a/initdb.py +++ b/initdb.py @@ -174,6 +174,7 @@ def __create_manifest_and_tags( config = { "id": current_id, + "Size": len(content), } if parent_id: config["parent"] = parent_id @@ -1239,6 +1240,8 @@ WHITELISTED_EMPTY_MODELS = [ "LogEntry", "LogEntry2", "ManifestSecurityStatus", + "ManifestLegacyImage", + "Image", ] diff --git a/requirements-nover.txt b/requirements-nover.txt index 1450b97c6..0f2aa6565 100644 --- a/requirements-nover.txt +++ b/requirements-nover.txt @@ -34,6 +34,7 @@ geoip2 gevent gipc gunicorn +hashids hiredis html5lib==0.9999999 # pinned due to xhtml2pdf httmock diff --git a/requirements.txt b/requirements.txt index 0f85f228a..29c22d298 100644 --- a/requirements.txt +++ b/requirements.txt @@ -68,9 +68,9 @@ futures==3.1.1 geoip2==3.0.0 gevent==1.4.0 gipc==1.0.1 -gpg==1.10.0 greenlet==0.4.15 gunicorn==20.0.4 +hashids==1.2.0 hiredis==1.0.1 html5lib==1.0.1 httmock==1.3.0 diff --git a/static/directives/repo-view/image-tag-tooltip.html b/static/directives/repo-view/image-tag-tooltip.html deleted file mode 100644 index 3b1051d06..000000000 --- a/static/directives/repo-view/image-tag-tooltip.html +++ /dev/null @@ -1,11 +0,0 @@ -
-
- Image {{ tag.image_id.substr(0, 12) }} -
-
    -
  • {{ tag.name }}
  • -
-
and {{ imageMap[tag.image_id].length - 5 }} more tags
-
\ No newline at end of file diff --git a/static/directives/repo-view/manifest-tag-tooltip.html b/static/directives/repo-view/manifest-tag-tooltip.html new file mode 100644 index 000000000..73c204bc1 --- /dev/null +++ b/static/directives/repo-view/manifest-tag-tooltip.html @@ -0,0 +1,11 @@ +
+
+ Manifest {{ tag.manifest_digest.substr(7, 12) }} +
+
    +
  • {{ tag.name }}
  • +
+
and {{ manifestMap[tag.manifest_digest].length - 5 }} more tags
+
\ No newline at end of file diff --git a/static/directives/repo-view/repo-panel-tags.html b/static/directives/repo-view/repo-panel-tags.html index 935f46688..a2a0c9d0c 100644 --- a/static/directives/repo-view/repo-panel-tags.html +++ b/static/directives/repo-view/repo-panel-tags.html @@ -32,9 +32,9 @@ Commit SHAs -
- {{ ::it.image_id.substr(0, 12) }} +
+
@@ -116,16 +116,16 @@ style="width: 140px;"> Expires - - Manifest + Manifest - + @@ -167,14 +167,6 @@ See Child Manifests - - - - Unsupported - - @@ -198,11 +190,11 @@ + ng-if="manifestTracks.length > maxTrackCount"> - + + ng-if="::getTrackEntryForIndex(mt, $parent.$parent.$index)" + ng-class="::trackLineClass(mt, $parent.$parent.$parent.$index)" + ng-style="::{'borderColor': getTrackEntryForIndex(mt, $parent.$parent.$parent.$index).color}"> - + + ng-if="::getTrackEntryForIndex(mt, $parent.$parent.$parent.$parent.$index)" + ng-class="::trackLineExpandedClass(mt, $parent.$parent.$parent.$parent.$parent.$index)" + ng-style="::{'borderColor': getTrackEntryForIndex(mt, $parent.$parent.$parent.$parent.$parent.$index).color}"> @@ -320,12 +312,12 @@
- + + ng-if="::getTrackEntryForIndex(mt, $parent.$parent.$index)" + ng-class="::trackLineExpandedClass(mt, $parent.$parent.$parent.$index)" + ng-style="::{'borderColor': getTrackEntryForIndex(mt, $parent.$parent.$parent.$index).color}"> diff --git a/static/js/directives/repo-view/repo-panel-tags.js b/static/js/directives/repo-view/repo-panel-tags.js index 95dc671ed..580d3795c 100644 --- a/static/js/directives/repo-view/repo-panel-tags.js +++ b/static/js/directives/repo-view/repo-panel-tags.js @@ -89,78 +89,78 @@ angular.module('quay').directive('repoPanelTags', function () { } // Sort the tags by the predicate and the reverse, and map the information. - var imageIDs = []; var ordered = TableService.buildOrderedItems(allTags, $scope.options, - ['name'], ['last_modified_datetime', 'size']).entries; + ['name', 'manifest_digest'], ['last_modified_datetime', 'size']).entries; var checked = []; - var imageMap = {}; - var imageIndexMap = {}; + var manifestMap = {}; + var manifestIndexMap = {}; + var manifestDigests = []; for (var i = 0; i < ordered.length; ++i) { var tagInfo = ordered[i]; - if (!tagInfo.image_id) { + if (!tagInfo.manifest_digest) { continue; } - if (!imageMap[tagInfo.image_id]) { - imageMap[tagInfo.image_id] = []; - imageIDs.push(tagInfo.image_id) + if (!manifestMap[tagInfo.manifest_digest]) { + manifestMap[tagInfo.manifest_digest] = []; + manifestDigests.push(tagInfo.manifest_digest) } - imageMap[tagInfo.image_id].push(tagInfo); + manifestMap[tagInfo.manifest_digest].push(tagInfo); if ($.inArray(tagInfo.name, $scope.selectedTags) >= 0) { checked.push(tagInfo); } - if (!imageIndexMap[tagInfo.image_id]) { - imageIndexMap[tagInfo.image_id] = {'start': i, 'end': i}; + if (!manifestIndexMap[tagInfo.manifest_digest]) { + manifestIndexMap[tagInfo.manifest_digest] = {'start': i, 'end': i}; } - imageIndexMap[tagInfo.image_id]['end'] = i; + manifestIndexMap[tagInfo.manifest_digest]['end'] = i; }; // Calculate the image tracks. var colors = d3.scale.category10(); - if (Object.keys(imageMap).length > 10) { + if (Object.keys(manifestMap).length > 10) { colors = d3.scale.category20(); } - var imageTracks = []; - var imageTrackEntries = []; - var trackEntryForImage = {}; + var manifestTracks = []; + var manifestTrackEntries = []; + var trackEntryForManifest = {}; var visibleStartIndex = ($scope.options.page * $scope.tagsPerPage); var visibleEndIndex = (($scope.options.page + 1) * $scope.tagsPerPage); - imageIDs.sort().map(function(image_id) { - if (imageMap[image_id].length >= 2){ + manifestDigests.sort().map(function(manifest_digest) { + if (manifestMap[manifest_digest].length >= 2){ // Create the track entry. - var imageIndexRange = imageIndexMap[image_id]; - var colorIndex = imageTrackEntries.length; + var manifestIndexRange = manifestIndexMap[manifest_digest]; + var colorIndex = manifestTrackEntries.length; var trackEntry = { - 'image_id': image_id, + 'manifest_digest': manifest_digest, 'color': colors(colorIndex), - 'count': imageMap[image_id].length, - 'tags': imageMap[image_id], - 'index_range': imageIndexRange, - 'visible': visibleStartIndex <= imageIndexRange.end && imageIndexRange.start <= visibleEndIndex, + 'count': manifestMap[manifest_digest].length, + 'tags': manifestMap[manifest_digest], + 'index_range': manifestIndexRange, + 'visible': visibleStartIndex <= manifestIndexRange.end && manifestIndexRange.start <= visibleEndIndex, }; - trackEntryForImage[image_id] = trackEntry; - imageMap[image_id]['color'] = colors(colorIndex); + trackEntryForManifest[manifest_digest] = trackEntry; + manifestMap[manifest_digest]['color'] = colors(colorIndex); // Find the track in which we can place the entry, if any. var existingTrack = null; - for (var i = 0; i < imageTracks.length; ++i) { + for (var i = 0; i < manifestTracks.length; ++i) { // For the current track, ensure that the start and end index // for the current entry is outside of the range of the track's // entries. If so, then we can add the entry to the track. - var currentTrack = imageTracks[i]; + var currentTrack = manifestTracks[i]; var canAddToCurrentTrack = true; for (var j = 0; j < currentTrack.entries.length; ++j) { var currentTrackEntry = currentTrack.entries[j]; - var entryInfo = imageIndexMap[currentTrackEntry.image_id]; - if (Math.max(entryInfo.start, imageIndexRange.start) <= Math.min(entryInfo.end, imageIndexRange.end)) { + var entryInfo = manifestIndexMap[currentTrackEntry.image_id]; + if (Math.max(entryInfo.start, manifestIndexRange.start) <= Math.min(entryInfo.end, manifestIndexRange.end)) { canAddToCurrentTrack = false; break; } @@ -175,38 +175,38 @@ angular.module('quay').directive('repoPanelTags', function () { // Add the entry to the track or create a new track if necessary. if (existingTrack) { existingTrack.entries.push(trackEntry) - existingTrack.entryByImageId[image_id] = trackEntry; - existingTrack.endIndex = Math.max(existingTrack.endIndex, imageIndexRange.end); + existingTrack.entryByManifestDigest[manifest_digest] = trackEntry; + existingTrack.endIndex = Math.max(existingTrack.endIndex, manifestIndexRange.end); - for (var j = imageIndexRange.start; j <= imageIndexRange.end; j++) { + for (var j = manifestIndexRange.start; j <= manifestIndexRange.end; j++) { existingTrack.entryByIndex[j] = trackEntry; } } else { - var entryByImageId = {}; - entryByImageId[image_id] = trackEntry; + var entryByManifestDigest = {}; + entryByManifestDigest[manifest_digest] = trackEntry; var entryByIndex = {}; - for (var j = imageIndexRange.start; j <= imageIndexRange.end; j++) { + for (var j = manifestIndexRange.start; j <= manifestIndexRange.end; j++) { entryByIndex[j] = trackEntry; } - imageTracks.push({ + manifestTracks.push({ 'entries': [trackEntry], - 'entryByImageId': entryByImageId, - 'startIndex': imageIndexRange.start, - 'endIndex': imageIndexRange.end, + 'entryByManifestDigest': entryByManifestDigest, + 'startIndex': manifestIndexRange.start, + 'endIndex': manifestIndexRange.end, 'entryByIndex': entryByIndex, }); } - imageTrackEntries.push(trackEntry); + manifestTrackEntries.push(trackEntry); } }); - $scope.imageMap = imageMap; - $scope.imageTracks = imageTracks; - $scope.imageTrackEntries = imageTrackEntries; - $scope.trackEntryForImage = trackEntryForImage; + $scope.manifestMap = manifestMap; + $scope.manifestTracks = manifestTracks; + $scope.manifestTrackEntries = manifestTrackEntries; + $scope.trackEntryForManifest = trackEntryForManifest; $scope.options.page = 0; @@ -241,7 +241,7 @@ angular.module('quay').directive('repoPanelTags', function () { }); $scope.$watch('selectedTags', function(selectedTags) { - if (!selectedTags || !$scope.repository || !$scope.imageMap) { return; } + if (!selectedTags || !$scope.repository || !$scope.manifestMap) { return; } $scope.checkedTags.setChecked(selectedTags.map(function(tag) { return $scope.repositoryTags[tag]; @@ -410,8 +410,8 @@ angular.module('quay').directive('repoPanelTags', function () { return false; }; - $scope.imageIDFilter = function(image_id, tag) { - return tag.image_id == image_id; + $scope.manifestDigestFilter = function(manifest_digest, tag) { + return tag.manifest_digest == manifest_digest; }; $scope.setTab = function(tab) { @@ -420,7 +420,7 @@ angular.module('quay').directive('repoPanelTags', function () { $scope.selectTrack = function(it) { $scope.checkedTags.checkByFilter(function(tag) { - return $scope.imageIDFilter(it.image_id, tag); + return $scope.manifestDigestFilter(it.manifest_digest, tag); }); }; diff --git a/test/fixtures.py b/test/fixtures.py index ff235f6d0..762489de8 100644 --- a/test/fixtures.py +++ b/test/fixtures.py @@ -26,7 +26,6 @@ from endpoints.appr import appr_bp from endpoints.web import web from endpoints.v1 import v1_bp from endpoints.v2 import v2_bp -from endpoints.verbs import verbs as verbs_bp from endpoints.webhooks import webhooks from initdb import initialize_database, populate_database @@ -312,7 +311,6 @@ def app(appconfig, initialized_db): app.register_blueprint(api_bp, url_prefix="/api") app.register_blueprint(appr_bp, url_prefix="/cnr") app.register_blueprint(web, url_prefix="/") - app.register_blueprint(verbs_bp, url_prefix="/c1") app.register_blueprint(v1_bp, url_prefix="/v1") app.register_blueprint(v2_bp, url_prefix="/v2") app.register_blueprint(webhooks, url_prefix="/webhooks") diff --git a/test/registry/fixtures.py b/test/registry/fixtures.py index 2132c251b..106b53230 100644 --- a/test/registry/fixtures.py +++ b/test/registry/fixtures.py @@ -16,9 +16,8 @@ from app import storage from data.database import ( close_db_filter, configure, - DerivedStorageForImage, QueueItem, - Image, + ImageStorage, TagManifest, TagManifestToManifest, Manifest, @@ -30,6 +29,7 @@ from data.database import ( from data import model from data.registry_model import registry_model from endpoints.csrf import generate_csrf_token +from image.docker.schema2 import EMPTY_LAYER_BLOB_DIGEST from util.log import logfile_path from test.registry.liveserverfixture import LiveServerExecutor @@ -46,15 +46,22 @@ def registry_server_executor(app): ) return "OK" - def delete_image(image_id): - image = Image.get(docker_image_id=image_id) - image.docker_image_id = "DELETED" - image.save() - return "OK" + def verify_replication_for(namespace, repo_name, tag_name): + repo_ref = registry_model.lookup_repository(namespace, repo_name) + assert repo_ref + + tag = registry_model.get_repo_tag(repo_ref, tag_name) + assert tag + + manifest = registry_model.get_manifest_for_tag(tag) + assert manifest + + for layer in registry_model.list_manifest_layers(manifest, storage): + if layer.blob.digest != EMPTY_LAYER_BLOB_DIGEST: + QueueItem.select().where( + QueueItem.queue_name ** ("%" + layer.blob.uuid + "%") + ).get() - def get_storage_replication_entry(image_id): - image = Image.get(docker_image_id=image_id) - QueueItem.select().where(QueueItem.queue_name ** ("%" + image.storage.uuid + "%")).get() return "OK" def set_feature(feature_name, value): @@ -81,10 +88,6 @@ def registry_server_executor(app): return jsonify({"old_value": old_value}) - def clear_derived_cache(): - DerivedStorageForImage.delete().execute() - return "OK" - def clear_uncompressed_size(image_id): image = model.image.get_image_by_id("devtable", "newrepo", image_id) image.storage.uncompressed_size = None @@ -158,11 +161,9 @@ def registry_server_executor(app): executor = LiveServerExecutor() executor.register("generate_csrf", generate_csrf) executor.register("set_supports_direct_download", set_supports_direct_download) - executor.register("delete_image", delete_image) - executor.register("get_storage_replication_entry", get_storage_replication_entry) + executor.register("verify_replication_for", verify_replication_for) executor.register("set_feature", set_feature) executor.register("set_config_key", set_config_key) - executor.register("clear_derived_cache", clear_derived_cache) executor.register("clear_uncompressed_size", clear_uncompressed_size) executor.register("add_token", add_token) executor.register("break_database", break_database) diff --git a/test/registry/protocol_v1.py b/test/registry/protocol_v1.py index 5f8d1ca6b..07a4e7cb8 100644 --- a/test/registry/protocol_v1.py +++ b/test/registry/protocol_v1.py @@ -153,6 +153,9 @@ class V1Protocol(RegistryProtocol): assert expected_failure == Failures.UNKNOWN_TAG return None + if expected_failure == Failures.UNKNOWN_TAG: + return None + tag_image_id = image_ids[tag_name] assert image_id_data.json() == tag_image_id @@ -331,7 +334,7 @@ class V1Protocol(RegistryProtocol): namespace, repo_name, tag_name, - image, + image_id, credentials=None, expected_failure=None, options=None, @@ -341,7 +344,7 @@ class V1Protocol(RegistryProtocol): session, "PUT", "/v1/repositories/%s/tags/%s" % (self.repo_name(namespace, repo_name), tag_name), - data='"%s"' % image.id, + data='"%s"' % image_id, auth=auth, expected_status=(200, expected_failure, V1ProtocolSteps.PUT_TAG), ) diff --git a/test/registry/registry_tests.py b/test/registry/registry_tests.py index 681c807d7..5052552c2 100644 --- a/test/registry/registry_tests.py +++ b/test/registry/registry_tests.py @@ -835,10 +835,11 @@ def test_image_replication( credentials=credentials, ) - # Ensure that entries were created for each image. - for image_id in list(result.image_ids.values()): - r = registry_server_executor.on(liveserver).get_storage_replication_entry(image_id) - assert r.text == "OK" + # Ensure that entries were created for each layer. + r = registry_server_executor.on(liveserver).verify_replication_for( + "devtable", "newrepo", "latest" + ) + assert r.text == "OK" def test_image_replication_empty_layers( @@ -872,10 +873,11 @@ def test_image_replication_empty_layers( credentials=credentials, ) - # Ensure that entries were created for each image. - for image_id in list(result.image_ids.values()): - r = registry_server_executor.on(liveserver).get_storage_replication_entry(image_id) - assert r.text == "OK" + # Ensure that entries were created for each layer. + r = registry_server_executor.on(liveserver).verify_replication_for( + "devtable", "newrepo", "latest" + ) + assert r.text == "OK" @pytest.mark.parametrize( @@ -1615,333 +1617,6 @@ def test_tags_disabled_namespace( ) -def test_squashed_image_disabled_namespace( - pusher, sized_images, liveserver_session, liveserver, registry_server_executor, app_reloader -): - """ Test: Attempting to pull a squashed image from a disabled namespace. """ - credentials = ("devtable", "password") - - # Push an image to download. - pusher.push( - liveserver_session, "buynlarge", "newrepo", "latest", sized_images, credentials=credentials - ) - - # Disable the buynlarge namespace. - registry_server_executor.on(liveserver).disable_namespace("buynlarge") - - # Attempt to pull the squashed version. - response = liveserver_session.get("/c1/squash/buynlarge/newrepo/latest", auth=credentials) - assert response.status_code == 400 - - -def test_squashed_image_disabled_user( - pusher, sized_images, liveserver_session, liveserver, registry_server_executor, app_reloader -): - """ Test: Attempting to pull a squashed image via a disabled user. """ - credentials = ("devtable", "password") - - # Push an image to download. - pusher.push( - liveserver_session, "buynlarge", "newrepo", "latest", sized_images, credentials=credentials - ) - - # Disable the devtable namespace. - registry_server_executor.on(liveserver).disable_namespace("devtable") - - # Attempt to pull the squashed version. - response = liveserver_session.get("/c1/squash/buynlarge/newrepo/latest", auth=credentials) - assert response.status_code == 403 - - -@pytest.mark.parametrize("use_estimates", [False, True,]) -def test_multilayer_squashed_images( - use_estimates, - pusher, - multi_layer_images, - liveserver_session, - liveserver, - registry_server_executor, - app_reloader, -): - """ Test: Pulling of multilayer, complex squashed images. """ - credentials = ("devtable", "password") - - # Push an image to download. - pusher.push( - liveserver_session, - "devtable", - "newrepo", - "latest", - multi_layer_images, - credentials=credentials, - ) - - if use_estimates: - # Clear the uncompressed size stored for the images, to ensure that we estimate instead. - for image in multi_layer_images: - registry_server_executor.on(liveserver).clear_uncompressed_size(image.id) - - # Pull the squashed version. - response = liveserver_session.get("/c1/squash/devtable/newrepo/latest", auth=credentials) - assert response.status_code == 200 - - tar = tarfile.open(fileobj=BytesIO(response.content)) - - # Verify the squashed image. - expected_image_id = next( - (name for name in tar.getnames() if not "/" in name and name != "repositories") - ) - expected_names = [ - "repositories", - expected_image_id, - "%s/json" % expected_image_id, - "%s/VERSION" % expected_image_id, - "%s/layer.tar" % expected_image_id, - ] - - assert tar.getnames() == expected_names - - # Verify the JSON image data. - json_data = tar.extractfile(tar.getmember("%s/json" % expected_image_id)).read() - - # Ensure the JSON loads and parses. - result = json.loads(json_data) - assert result["id"] == expected_image_id - assert result["config"]["internal_id"] == "layer5" - - # Ensure that squashed layer tar can be opened. - tar = tarfile.open(fileobj=tar.extractfile(tar.getmember("%s/layer.tar" % expected_image_id))) - assert set(tar.getnames()) == {"contents", "file1", "file2", "file3", "file4"} - - # Check the contents of various files. - assert tar.extractfile("contents").read() == b"layer 5 contents" - assert tar.extractfile("file1").read() == b"from-layer-3" - assert tar.extractfile("file2").read() == b"from-layer-2" - assert tar.extractfile("file3").read() == b"from-layer-4" - assert tar.extractfile("file4").read() == b"from-layer-5" - - -@pytest.mark.parametrize("use_estimates", [False, True,]) -@pytest.mark.parametrize("is_readonly", [False, True,]) -def test_squashed_images( - use_estimates, - pusher, - sized_images, - liveserver_session, - is_readonly, - liveserver, - registry_server_executor, - app_reloader, -): - """ Test: Pulling of squashed images. """ - credentials = ("devtable", "password") - - # Push an image to download. - pusher.push( - liveserver_session, "devtable", "newrepo", "latest", sized_images, credentials=credentials - ) - - if use_estimates: - # Clear the uncompressed size stored for the images, to ensure that we estimate instead. - for image in sized_images: - registry_server_executor.on(liveserver).clear_uncompressed_size(image.id) - - # Pull the squashed version. - with ConfigChange( - "REGISTRY_STATE", - "readonly" if is_readonly else "normal", - registry_server_executor.on(liveserver), - liveserver, - ): - response = liveserver_session.get("/c1/squash/devtable/newrepo/latest", auth=credentials) - assert response.status_code == 200 - - tar = tarfile.open(fileobj=BytesIO(response.content)) - - # Verify the squashed image. - expected_image_id = next( - (name for name in tar.getnames() if not "/" in name and name != "repositories") - ) - expected_names = [ - "repositories", - expected_image_id, - "%s/json" % expected_image_id, - "%s/VERSION" % expected_image_id, - "%s/layer.tar" % expected_image_id, - ] - - assert tar.getnames() == expected_names - - # Verify the JSON image data. - json_data = tar.extractfile(tar.getmember("%s/json" % expected_image_id)).read() - - # Ensure the JSON loads and parses. - result = json.loads(json_data) - assert result["id"] == expected_image_id - assert result["config"]["foo"] == "childbar" - - # Ensure that squashed layer tar can be opened. - tar = tarfile.open( - fileobj=tar.extractfile(tar.getmember("%s/layer.tar" % expected_image_id)) - ) - assert tar.getnames() == ["contents"] - - # Check the contents. - assert tar.extractfile("contents").read() == b"some contents" - - -EXPECTED_ACI_MANIFEST = { - "acKind": "ImageManifest", - "app": { - "environment": [], - "mountPoints": [], - "group": "root", - "user": "root", - "workingDirectory": "/", - "exec": ["/bin/sh", "-c", '""hello""'], - "isolators": [], - "eventHandlers": [], - "ports": [], - "annotations": [ - {"name": "created", "value": "2018-04-03T18:37:09.284840891Z"}, - {"name": "homepage", "value": "http://localhost:5000/devtable/newrepo:latest"}, - {"name": "quay.io/derived-image", "value": "DERIVED_IMAGE_ID"}, - ], - }, - "labels": [ - {"name": "version", "value": "latest"}, - {"name": "arch", "value": "amd64"}, - {"name": "os", "value": "linux"}, - ], - "acVersion": "0.6.1", - "name": "localhost/devtable/newrepo", -} - - -@pytest.mark.parametrize("is_readonly", [False, True,]) -def test_aci_conversion( - pusher, - sized_images, - liveserver_session, - is_readonly, - liveserver, - registry_server_executor, - app_reloader, -): - """ Test: Pulling of ACI converted images. """ - credentials = ("devtable", "password") - - # Push an image to download. - pusher.push( - liveserver_session, "devtable", "newrepo", "latest", sized_images, credentials=credentials - ) - - # Pull the ACI version. - with ConfigChange( - "REGISTRY_STATE", - "readonly" if is_readonly else "normal", - registry_server_executor.on(liveserver), - liveserver, - ): - response = liveserver_session.get( - "/c1/aci/server_name/devtable/newrepo/latest/aci/linux/amd64", auth=credentials - ) - assert response.status_code == 200 - tar = tarfile.open(fileobj=BytesIO(response.content)) - assert set(tar.getnames()) == {"manifest", "rootfs", "rootfs/contents"} - - assert tar.extractfile("rootfs/contents").read() == b"some contents" - loaded = json.loads(tar.extractfile("manifest").read()) - for annotation in loaded["app"]["annotations"]: - if annotation["name"] == "quay.io/derived-image": - annotation["value"] = "DERIVED_IMAGE_ID" - - assert loaded == EXPECTED_ACI_MANIFEST - - if not is_readonly: - # Wait for the ACI signature to be written. - time.sleep(1) - - # Pull the ACI signature. - response = liveserver_session.get( - "/c1/aci/server_name/devtable/newrepo/latest/aci.asc/linux/amd64", auth=credentials - ) - assert response.status_code == 200 - - -@pytest.mark.parametrize("schema_version", [1, 2,]) -def test_aci_conversion_manifest_list( - v22_protocol, - sized_images, - different_images, - liveserver_session, - data_model, - liveserver, - registry_server_executor, - app_reloader, - schema_version, -): - """ Test: Pulling of ACI converted image from a manifest list. """ - credentials = ("devtable", "password") - options = ProtocolOptions() - - # Build the manifests that will go in the list. - blobs = {} - - signed = v22_protocol.build_schema1( - "devtable", "newrepo", "latest", sized_images, blobs, options, arch="amd64" - ) - first_manifest = signed.unsigned() - if schema_version == 2: - first_manifest = v22_protocol.build_schema2(sized_images, blobs, options) - - second_manifest = v22_protocol.build_schema2(different_images, blobs, options) - - # Create and push the manifest list. - builder = DockerSchema2ManifestListBuilder() - builder.add_manifest(first_manifest, "amd64", "linux") - builder.add_manifest(second_manifest, "arm", "linux") - manifestlist = builder.build() - - v22_protocol.push_list( - liveserver_session, - "devtable", - "newrepo", - "latest", - manifestlist, - [first_manifest, second_manifest], - blobs, - credentials=credentials, - options=options, - ) - - # Pull the ACI version. - response = liveserver_session.get( - "/c1/aci/server_name/devtable/newrepo/latest/aci/linux/amd64", auth=credentials - ) - assert response.status_code == 200 - tar = tarfile.open(fileobj=BytesIO(response.content)) - assert set(tar.getnames()) == {"manifest", "rootfs", "rootfs/contents"} - - assert tar.extractfile("rootfs/contents").read() == b"some contents" - - loaded = json.loads(tar.extractfile("manifest").read()) - for annotation in loaded["app"]["annotations"]: - if annotation["name"] == "quay.io/derived-image": - annotation["value"] = "DERIVED_IMAGE_ID" - - assert loaded == EXPECTED_ACI_MANIFEST - - # Wait for the ACI signature to be written. - time.sleep(1) - - # Pull the ACI signature. - response = liveserver_session.get( - "/c1/aci/server_name/devtable/newrepo/latest/aci.asc/linux/amd64", auth=credentials - ) - assert response.status_code == 200 - - @pytest.mark.parametrize( "push_user, push_namespace, push_repo, mount_repo_name, expected_failure", [ @@ -2323,10 +1998,8 @@ def test_push_pull_same_blobs(pusher, puller, liveserver_session, app_reloader): ) -def test_push_tag_existing_image( - v1_protocol, puller, basic_images, liveserver_session, app_reloader -): - """ Test: Push a new tag on an existing manifest/image. """ +def test_push_tag_existing_image(v1_protocol, basic_images, liveserver_session, app_reloader): + """ Test: Push a new tag on an existing image. """ credentials = ("devtable", "password") # Push a new repository. @@ -2334,18 +2007,24 @@ def test_push_tag_existing_image( liveserver_session, "devtable", "newrepo", "latest", basic_images, credentials=credentials ) - # Push the same image/manifest to another tag in the repository. + # Pull the repository to verify. + pulled = v1_protocol.pull( + liveserver_session, "devtable", "newrepo", "latest", basic_images, credentials=credentials, + ) + assert pulled.image_ids + + # Push the same image to another tag in the repository. v1_protocol.tag( liveserver_session, "devtable", "newrepo", "anothertag", - basic_images[-1], + pulled.image_ids["latest"], credentials=credentials, ) # Pull the repository to verify. - puller.pull( + v1_protocol.pull( liveserver_session, "devtable", "newrepo", @@ -2655,131 +2334,6 @@ def test_push_pull_manifest_list_duplicate_manifest( ) -def test_squashed_images_empty_layer( - pusher, - images_with_empty_layer, - liveserver_session, - liveserver, - registry_server_executor, - app_reloader, -): - """ Test: Pulling of squashed images for a manifest with empty layers. """ - credentials = ("devtable", "password") - - # Push an image to download. - pusher.push( - liveserver_session, - "devtable", - "newrepo", - "latest", - images_with_empty_layer, - credentials=credentials, - ) - - # Pull the squashed version. - response = liveserver_session.get("/c1/squash/devtable/newrepo/latest", auth=credentials) - assert response.status_code == 200 - - tar = tarfile.open(fileobj=BytesIO(response.content)) - - # Verify the squashed image. - expected_image_id = next( - (name for name in tar.getnames() if not "/" in name and name != "repositories") - ) - expected_names = [ - "repositories", - expected_image_id, - "%s/json" % expected_image_id, - "%s/VERSION" % expected_image_id, - "%s/layer.tar" % expected_image_id, - ] - - assert tar.getnames() == expected_names - - -def test_squashed_image_unsupported( - v22_protocol, basic_images, liveserver_session, liveserver, app_reloader, data_model -): - """ Test: Attempting to pull a squashed image for a manifest list without an amd64+linux entry. - """ - credentials = ("devtable", "password") - options = ProtocolOptions() - - # Build the manifest that will go in the list. - blobs = {} - manifest = v22_protocol.build_schema2(basic_images, blobs, options) - - # Create and push the manifest list. - builder = DockerSchema2ManifestListBuilder() - builder.add_manifest(manifest, "foobar", "someos") - manifestlist = builder.build() - - v22_protocol.push_list( - liveserver_session, - "devtable", - "newrepo", - "latest", - manifestlist, - [manifest], - blobs, - credentials=credentials, - options=options, - ) - - # Attempt to pull the squashed version. - response = liveserver_session.get("/c1/squash/devtable/newrepo/latest", auth=credentials) - assert response.status_code == 404 - - -def test_squashed_image_manifest_list( - v22_protocol, basic_images, liveserver_session, liveserver, app_reloader, data_model -): - """ Test: Pull a squashed image for a manifest list with an amd64+linux entry. - """ - credentials = ("devtable", "password") - options = ProtocolOptions() - - # Build the manifest that will go in the list. - blobs = {} - manifest = v22_protocol.build_schema2(basic_images, blobs, options) - - # Create and push the manifest list. - builder = DockerSchema2ManifestListBuilder() - builder.add_manifest(manifest, "amd64", "linux") - manifestlist = builder.build() - - v22_protocol.push_list( - liveserver_session, - "devtable", - "newrepo", - "latest", - manifestlist, - [manifest], - blobs, - credentials=credentials, - options=options, - ) - - # Pull the squashed version. - response = liveserver_session.get("/c1/squash/devtable/newrepo/latest", auth=credentials) - assert response.status_code == 200 - - # Verify the squashed image. - tar = tarfile.open(fileobj=BytesIO(response.content)) - expected_image_id = next( - (name for name in tar.getnames() if not "/" in name and name != "repositories") - ) - expected_names = [ - "repositories", - expected_image_id, - "%s/json" % expected_image_id, - "%s/VERSION" % expected_image_id, - "%s/layer.tar" % expected_image_id, - ] - - assert tar.getnames() == expected_names - - def test_verify_schema2( v22_protocol, basic_images, liveserver_session, liveserver, app_reloader, data_model ): diff --git a/test/test_api_usage.py b/test/test_api_usage.py index c18b13c5a..8c102f6b7 100644 --- a/test/test_api_usage.py +++ b/test/test_api_usage.py @@ -2444,7 +2444,6 @@ class TestDeleteRepository(ApiTestCase): # Make sure the repository has some images and tags. repo_ref = registry_model.lookup_repository(ADMIN_ACCESS_USER, "complex") - self.assertTrue(len(list(registry_model.get_legacy_images(repo_ref))) > 0) self.assertTrue(len(list(registry_model.list_all_active_repository_tags(repo_ref))) > 0) # Add some data for the repository, in addition to is already existing images and tags. @@ -2525,11 +2524,11 @@ class TestGetRepository(ApiTestCase): self.login(ADMIN_ACCESS_USER) # base + repo + is_starred + tags - with assert_query_count(BASE_LOGGEDIN_QUERY_COUNT + 4 + 1): + with assert_query_count(BASE_LOGGEDIN_QUERY_COUNT + 4): self.getJsonResponse(Repository, params=dict(repository=ADMIN_ACCESS_USER + "/simple")) # base + repo + is_starred + tags - with assert_query_count(BASE_LOGGEDIN_QUERY_COUNT + 4 + 1): + with assert_query_count(BASE_LOGGEDIN_QUERY_COUNT + 4): json = self.getJsonResponse( Repository, params=dict(repository=ADMIN_ACCESS_USER + "/gargantuan") ) @@ -3326,8 +3325,7 @@ class TestListAndDeleteTag(ApiTestCase): params=dict(repository=ADMIN_ACCESS_USER + "/complex", tag="sometag"), ) - sometag_images = json["images"] - self.assertEqual(sometag_images, staging_images) + assert json["images"] # Move the tag. self.putResponse( @@ -3344,8 +3342,7 @@ class TestListAndDeleteTag(ApiTestCase): ) sometag_new_images = json["images"] - self.assertEqual(1, len(sometag_new_images)) - self.assertEqual(staging_images[-1], sometag_new_images[0]) + assert sometag_new_images def test_deletesubtag(self): self.login(ADMIN_ACCESS_USER) @@ -3384,7 +3381,7 @@ class TestListAndDeleteTag(ApiTestCase): self.login(ADMIN_ACCESS_USER) repo_ref = registry_model.lookup_repository(ADMIN_ACCESS_USER, "simple") - latest_tag = registry_model.get_repo_tag(repo_ref, "latest", include_legacy_image=True) + latest_tag = registry_model.get_repo_tag(repo_ref, "latest") # Create 8 tags in the simple repo. remaining_tags = {"latest", "prod"} @@ -3392,7 +3389,7 @@ class TestListAndDeleteTag(ApiTestCase): tag_name = "tag" + str(i) remaining_tags.add(tag_name) assert registry_model.retarget_tag( - repo_ref, tag_name, latest_tag.legacy_image, storage, docker_v2_signing_key + repo_ref, tag_name, latest_tag.manifest, storage, docker_v2_signing_key ) # Make sure we can iterate over all of them. diff --git a/test/test_secscan.py b/test/test_secscan.py index 9bf182c1c..59d7f497d 100644 --- a/test/test_secscan.py +++ b/test/test_secscan.py @@ -2,44 +2,26 @@ import json import time import unittest -from app import app, storage, notification_queue, url_scheme_and_hostname +from app import app, storage, url_scheme_and_hostname from data import model from data.registry_model import registry_model -from data.database import Image, IMAGE_NOT_SCANNED_ENGINE_VERSION -from endpoints.v2 import v2_bp +from data.database import Image, ManifestLegacyImage from initdb import setup_database_for_testing, finished_database_for_testing -from notifications.notificationevent import VulnerabilityFoundEvent from util.secscan.secscan_util import get_blob_download_uri_getter -from util.morecollections import AttrDict from util.secscan.api import SecurityScannerAPI, APIRequestFailure -from util.secscan.analyzer import LayerAnalyzer from util.secscan.fake import fake_security_scanner -from util.secscan.notifier import SecurityNotificationHandler, ProcessNotificationPageResult from util.security.instancekeys import InstanceKeys -from workers.security_notification_worker import SecurityNotificationWorker ADMIN_ACCESS_USER = "devtable" SIMPLE_REPO = "simple" -COMPLEX_REPO = "complex" - - -def process_notification_data(legacy_api, notification_data): - handler = SecurityNotificationHandler(legacy_api, 100) - result = handler.process_notification_page_data(notification_data) - handler.send_notifications() - return result == ProcessNotificationPageResult.FINISHED_PROCESSING def _get_legacy_image(namespace, repo, tag, include_storage=True): repo_ref = registry_model.lookup_repository(namespace, repo) - repo_tag = registry_model.get_repo_tag(repo_ref, tag, include_legacy_image=True) - return Image.get(id=repo_tag.legacy_image._db_id) - - -def _delete_tag(namespace, repo, tag): - repo_ref = registry_model.lookup_repository(namespace, repo) - registry_model.delete_tag(repo_ref, tag) + repo_tag = registry_model.get_repo_tag(repo_ref, tag) + manifest = registry_model.get_manifest_for_tag(repo_tag) + return ManifestLegacyImage.get(manifest_id=manifest._db_id).image class TestSecurityScanner(unittest.TestCase): @@ -93,785 +75,24 @@ class TestSecurityScanner(unittest.TestCase): """ Test for basic retrieval of layers from the security scanner. """ - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest", include_storage=True) + + repo_ref = registry_model.lookup_repository(ADMIN_ACCESS_USER, SIMPLE_REPO) + repo_tag = registry_model.get_repo_tag(repo_ref, "latest") + manifest = registry_model.get_manifest_for_tag(repo_tag) + registry_model.populate_legacy_images_for_testing(manifest, storage) with fake_security_scanner() as security_scanner: # Ensure the layer doesn't exist yet. - self.assertFalse(security_scanner.has_layer(security_scanner.layer_id(layer))) - self.assertIsNone(self.api.get_layer_data(layer)) + self.assertFalse(security_scanner.has_layer(security_scanner.layer_id(manifest))) + self.assertIsNone(self.api.get_layer_data(manifest)) # Add the layer. - security_scanner.add_layer(security_scanner.layer_id(layer)) + security_scanner.add_layer(security_scanner.layer_id(manifest)) # Retrieve the results. - result = self.api.get_layer_data(layer, include_vulnerabilities=True) + result = self.api.get_layer_data(manifest, include_vulnerabilities=True) self.assertIsNotNone(result) - self.assertEqual(result["Layer"]["Name"], security_scanner.layer_id(layer)) - - def test_analyze_layer_nodirectdownload_success(self): - """ - Tests analyzing a layer when direct download is disabled. - """ - - # Disable direct download in fake storage. - storage.put_content(["local_us"], "supports_direct_download", b"false") - - try: - app.register_blueprint(v2_bp, url_prefix="/v2") - except: - # Already registered. - pass - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest", include_storage=True) - self.assertFalse(layer.security_indexed) - self.assertEqual(-1, layer.security_indexed_engine) - - # Ensure that the download is a registry+JWT download. - uri, auth_header = self.api._get_image_url_and_auth(layer) - self.assertIsNotNone(uri) - self.assertIsNotNone(auth_header) - - # Ensure the download doesn't work without the header. - rv = self.app.head(uri) - self.assertEqual(rv.status_code, 401) - - # Ensure the download works with the header. Note we use a HEAD here, as GET causes DB - # access which messes with the test runner's rollback. - rv = self.app.head(uri, headers=[("authorization", auth_header)]) - self.assertEqual(rv.status_code, 200) - - # Ensure the code works when called via analyze. - with fake_security_scanner() as security_scanner: - analyzer = LayerAnalyzer(app.config, self.api) - analyzer.analyze_recursively(layer) - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest") - self.assertAnalyzed(layer, security_scanner, True, 1) - - def test_analyze_layer_success(self): - """ - Tests that analyzing a layer successfully marks it as analyzed. - """ - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest", include_storage=True) - self.assertFalse(layer.security_indexed) - self.assertEqual(-1, layer.security_indexed_engine) - - with fake_security_scanner() as security_scanner: - analyzer = LayerAnalyzer(app.config, self.api) - analyzer.analyze_recursively(layer) - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest") - self.assertAnalyzed(layer, security_scanner, True, 1) - - def test_analyze_layer_failure(self): - """ - Tests that failing to analyze a layer (because it 422s) marks it as analyzed but failed. - """ - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest", include_storage=True) - self.assertFalse(layer.security_indexed) - self.assertEqual(-1, layer.security_indexed_engine) - - with fake_security_scanner() as security_scanner: - security_scanner.set_fail_layer_id(security_scanner.layer_id(layer)) - - analyzer = LayerAnalyzer(app.config, self.api) - analyzer.analyze_recursively(layer) - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest") - self.assertAnalyzed(layer, security_scanner, False, 1) - - def test_analyze_layer_internal_error(self): - """ - Tests that failing to analyze a layer (because it 500s) marks it as not analyzed. - """ - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest", include_storage=True) - self.assertFalse(layer.security_indexed) - self.assertEqual(-1, layer.security_indexed_engine) - - with fake_security_scanner() as security_scanner: - security_scanner.set_internal_error_layer_id(security_scanner.layer_id(layer)) - - analyzer = LayerAnalyzer(app.config, self.api) - with self.assertRaises(APIRequestFailure): - analyzer.analyze_recursively(layer) - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest") - self.assertAnalyzed(layer, security_scanner, False, -1) - - def test_analyze_layer_error(self): - """ - Tests that failing to analyze a layer (because it 400s) marks it as analyzed but failed. - """ - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest", include_storage=True) - self.assertFalse(layer.security_indexed) - self.assertEqual(-1, layer.security_indexed_engine) - - with fake_security_scanner() as security_scanner: - # Make is so trying to analyze the parent will fail with an error. - security_scanner.set_error_layer_id(security_scanner.layer_id(layer.parent)) - - # Try to the layer and its parents, but with one request causing an error. - analyzer = LayerAnalyzer(app.config, self.api) - analyzer.analyze_recursively(layer) - - # Make sure it is marked as analyzed, but in a failed state. - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest") - self.assertAnalyzed(layer, security_scanner, False, 1) - - def test_analyze_layer_unexpected_status(self): - """ - Tests that a response from a scanner with an unexpected status code fails correctly. - """ - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest", include_storage=True) - self.assertFalse(layer.security_indexed) - self.assertEqual(-1, layer.security_indexed_engine) - - with fake_security_scanner() as security_scanner: - # Make is so trying to analyze the parent will fail with an error. - security_scanner.set_unexpected_status_layer_id(security_scanner.layer_id(layer.parent)) - - # Try to the layer and its parents, but with one request causing an error. - analyzer = LayerAnalyzer(app.config, self.api) - with self.assertRaises(APIRequestFailure): - analyzer.analyze_recursively(layer) - - # Make sure it isn't analyzed. - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest") - self.assertAnalyzed(layer, security_scanner, False, -1) - - def test_analyze_layer_missing_parent_handled(self): - """ - Tests that a missing parent causes an automatic reanalysis, which succeeds. - """ - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest", include_storage=True) - self.assertFalse(layer.security_indexed) - self.assertEqual(-1, layer.security_indexed_engine) - - with fake_security_scanner() as security_scanner: - # Analyze the layer and its parents. - analyzer = LayerAnalyzer(app.config, self.api) - analyzer.analyze_recursively(layer) - - # Make sure it was analyzed. - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest") - self.assertAnalyzed(layer, security_scanner, True, 1) - - # Mark the layer as not yet scanned. - layer.security_indexed_engine = IMAGE_NOT_SCANNED_ENGINE_VERSION - layer.security_indexed = False - layer.save() - - # Remove the layer's parent entirely from the security scanner. - security_scanner.remove_layer(security_scanner.layer_id(layer.parent)) - - # Analyze again, which should properly re-analyze the missing parent and this layer. - analyzer.analyze_recursively(layer) - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest") - self.assertAnalyzed(layer, security_scanner, True, 1) - - def test_analyze_layer_invalid_parent(self): - """ - Tests that trying to reanalyze a parent that is invalid causes the layer to be marked as - analyzed, but failed. - """ - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest", include_storage=True) - self.assertFalse(layer.security_indexed) - self.assertEqual(-1, layer.security_indexed_engine) - - with fake_security_scanner() as security_scanner: - # Analyze the layer and its parents. - analyzer = LayerAnalyzer(app.config, self.api) - analyzer.analyze_recursively(layer) - - # Make sure it was analyzed. - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest") - self.assertAnalyzed(layer, security_scanner, True, 1) - - # Mark the layer as not yet scanned. - layer.security_indexed_engine = IMAGE_NOT_SCANNED_ENGINE_VERSION - layer.security_indexed = False - layer.save() - - # Remove the layer's parent entirely from the security scanner. - security_scanner.remove_layer(security_scanner.layer_id(layer.parent)) - - # Make is so trying to analyze the parent will fail. - security_scanner.set_error_layer_id(security_scanner.layer_id(layer.parent)) - - # Try to analyze again, which should try to reindex the parent and fail. - analyzer.analyze_recursively(layer) - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest") - self.assertAnalyzed(layer, security_scanner, False, 1) - - def test_analyze_layer_unsupported_parent(self): - """ - Tests that attempting to analyze a layer whose parent is unanalyzable, results in the layer - being marked as analyzed, but failed. - """ - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest", include_storage=True) - self.assertFalse(layer.security_indexed) - self.assertEqual(-1, layer.security_indexed_engine) - - with fake_security_scanner() as security_scanner: - # Make is so trying to analyze the parent will fail. - security_scanner.set_fail_layer_id(security_scanner.layer_id(layer.parent)) - - # Attempt to the layer and its parents. This should mark the layer itself as unanalyzable. - analyzer = LayerAnalyzer(app.config, self.api) - analyzer.analyze_recursively(layer) - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest") - self.assertAnalyzed(layer, security_scanner, False, 1) - - def test_analyze_layer_missing_storage(self): - """ - Tests trying to analyze a layer with missing storage. - """ - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest", include_storage=True) - self.assertFalse(layer.security_indexed) - self.assertEqual(-1, layer.security_indexed_engine) - - # Delete the storage for the layer. - path = model.storage.get_layer_path(layer.storage) - locations = app.config["DISTRIBUTED_STORAGE_PREFERENCE"] - storage.remove(locations, path) - storage.remove(locations, "all_files_exist") - - with fake_security_scanner() as security_scanner: - analyzer = LayerAnalyzer(app.config, self.api) - analyzer.analyze_recursively(layer) - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest") - self.assertAnalyzed(layer, security_scanner, False, 1) - - def assert_analyze_layer_notify( - self, security_indexed_engine, security_indexed, expect_notification - ): - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest", include_storage=True) - self.assertFalse(layer.security_indexed) - self.assertEqual(-1, layer.security_indexed_engine) - - # Ensure there are no existing events. - self.assertIsNone(notification_queue.get()) - - # Add a repo event for the layer. - repo = model.repository.get_repository(ADMIN_ACCESS_USER, SIMPLE_REPO) - model.notification.create_repo_notification( - repo, "vulnerability_found", "quay_notification", {}, {"level": 100} - ) - - # Update the layer's state before analyzing. - layer.security_indexed_engine = security_indexed_engine - layer.security_indexed = security_indexed - layer.save() - - with fake_security_scanner() as security_scanner: - security_scanner.set_vulns( - security_scanner.layer_id(layer), - [ - { - "Name": "CVE-2014-9471", - "Namespace": "debian:8", - "Description": "Some service", - "Link": "https://security-tracker.debian.org/tracker/CVE-2014-9471", - "Severity": "Low", - "FixedBy": "9.23-5", - }, - { - "Name": "CVE-2016-7530", - "Namespace": "debian:8", - "Description": "Some other service", - "Link": "https://security-tracker.debian.org/tracker/CVE-2016-7530", - "Severity": "Unknown", - "FixedBy": "19.343-2", - }, - ], - ) - - analyzer = LayerAnalyzer(app.config, self.api) - analyzer.analyze_recursively(layer) - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest") - self.assertAnalyzed(layer, security_scanner, True, 1) - - # Ensure an event was written for the tag (if necessary). - time.sleep(1) - queue_item = notification_queue.get() - - if expect_notification: - self.assertIsNotNone(queue_item) - - body = json.loads(queue_item.body) - self.assertEqual(set(["latest", "prod"]), set(body["event_data"]["tags"])) - self.assertEqual("CVE-2014-9471", body["event_data"]["vulnerability"]["id"]) - self.assertEqual("Low", body["event_data"]["vulnerability"]["priority"]) - self.assertTrue(body["event_data"]["vulnerability"]["has_fix"]) - - self.assertEqual("CVE-2014-9471", body["event_data"]["vulnerabilities"][0]["id"]) - self.assertEqual(2, len(body["event_data"]["vulnerabilities"])) - - # Ensure we get the correct event message out as well. - event = VulnerabilityFoundEvent() - msg = "1 Low and 1 more vulnerabilities were detected in repository devtable/simple in 2 tags" - self.assertEqual(msg, event.get_summary(body["event_data"], {})) - self.assertEqual("info", event.get_level(body["event_data"], {})) - else: - self.assertIsNone(queue_item) - - # Ensure its security indexed engine was updated. - updated_layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest") - self.assertEquals(updated_layer.id, layer.id) - self.assertTrue(updated_layer.security_indexed_engine > 0) - - def test_analyze_layer_success_events(self): - # Not previously indexed at all => Notification - self.assert_analyze_layer_notify(IMAGE_NOT_SCANNED_ENGINE_VERSION, False, True) - - def test_analyze_layer_success_no_notification(self): - # Previously successfully indexed => No notification - self.assert_analyze_layer_notify(0, True, False) - - def test_analyze_layer_failed_then_success_notification(self): - # Previously failed to index => Notification - self.assert_analyze_layer_notify(0, False, True) - - def test_notification_new_layers_not_vulnerable(self): - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest", include_storage=True) - layer_id = "%s.%s" % (layer.docker_image_id, layer.storage.uuid) - - # Add a repo event for the layer. - repo = model.repository.get_repository(ADMIN_ACCESS_USER, SIMPLE_REPO) - model.notification.create_repo_notification( - repo, "vulnerability_found", "quay_notification", {}, {"level": 100} - ) - - # Ensure that there are no event queue items for the layer. - self.assertIsNone(notification_queue.get()) - - # Fire off the notification processing. - with fake_security_scanner() as security_scanner: - analyzer = LayerAnalyzer(app.config, self.api) - analyzer.analyze_recursively(layer) - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest") - self.assertAnalyzed(layer, security_scanner, True, 1) - - # Add a notification for the layer. - notification_data = security_scanner.add_notification([layer_id], [], {}, {}) - - # Process the notification. - self.assertTrue(process_notification_data(self.api, notification_data)) - - # Ensure that there are no event queue items for the layer. - self.assertIsNone(notification_queue.get()) - - def test_notification_delete(self): - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest", include_storage=True) - layer_id = "%s.%s" % (layer.docker_image_id, layer.storage.uuid) - - # Add a repo event for the layer. - repo = model.repository.get_repository(ADMIN_ACCESS_USER, SIMPLE_REPO) - model.notification.create_repo_notification( - repo, "vulnerability_found", "quay_notification", {}, {"level": 100} - ) - - # Ensure that there are no event queue items for the layer. - self.assertIsNone(notification_queue.get()) - - # Fire off the notification processing. - with fake_security_scanner() as security_scanner: - analyzer = LayerAnalyzer(app.config, self.api) - analyzer.analyze_recursively(layer) - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest") - self.assertAnalyzed(layer, security_scanner, True, 1) - - # Add a notification for the layer. - notification_data = security_scanner.add_notification([layer_id], None, {}, None) - - # Process the notification. - self.assertTrue(process_notification_data(self.api, notification_data)) - - # Ensure that there are no event queue items for the layer. - self.assertIsNone(notification_queue.get()) - - def test_notification_new_layers(self): - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest", include_storage=True) - layer_id = "%s.%s" % (layer.docker_image_id, layer.storage.uuid) - - # Add a repo event for the layer. - repo = model.repository.get_repository(ADMIN_ACCESS_USER, SIMPLE_REPO) - model.notification.create_repo_notification( - repo, "vulnerability_found", "quay_notification", {}, {"level": 100} - ) - - # Ensure that there are no event queue items for the layer. - self.assertIsNone(notification_queue.get()) - - # Fire off the notification processing. - with fake_security_scanner() as security_scanner: - analyzer = LayerAnalyzer(app.config, self.api) - analyzer.analyze_recursively(layer) - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest") - self.assertAnalyzed(layer, security_scanner, True, 1) - - vuln_info = { - "Name": "CVE-TEST", - "Namespace": "debian:8", - "Description": "Some service", - "Link": "https://security-tracker.debian.org/tracker/CVE-2014-9471", - "Severity": "Low", - "FixedIn": {"Version": "9.23-5"}, - } - security_scanner.set_vulns(layer_id, [vuln_info]) - - # Add a notification for the layer. - notification_data = security_scanner.add_notification( - [], [layer_id], vuln_info, vuln_info - ) - - # Process the notification. - self.assertTrue(process_notification_data(self.api, notification_data)) - - # Ensure an event was written for the tag. - time.sleep(1) - queue_item = notification_queue.get() - self.assertIsNotNone(queue_item) - - item_body = json.loads(queue_item.body) - self.assertEqual(sorted(["prod", "latest"]), sorted(item_body["event_data"]["tags"])) - self.assertEqual("CVE-TEST", item_body["event_data"]["vulnerability"]["id"]) - self.assertEqual("Low", item_body["event_data"]["vulnerability"]["priority"]) - self.assertTrue(item_body["event_data"]["vulnerability"]["has_fix"]) - - def test_notification_no_new_layers(self): - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest", include_storage=True) - - # Add a repo event for the layer. - repo = model.repository.get_repository(ADMIN_ACCESS_USER, SIMPLE_REPO) - model.notification.create_repo_notification( - repo, "vulnerability_found", "quay_notification", {}, {"level": 100} - ) - - # Ensure that there are no event queue items for the layer. - self.assertIsNone(notification_queue.get()) - - # Fire off the notification processing. - with fake_security_scanner() as security_scanner: - analyzer = LayerAnalyzer(app.config, self.api) - analyzer.analyze_recursively(layer) - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest") - self.assertAnalyzed(layer, security_scanner, True, 1) - - # Add a notification for the layer. - notification_data = security_scanner.add_notification([], [], {}, {}) - - # Process the notification. - self.assertTrue(process_notification_data(self.api, notification_data)) - - # Ensure that there are no event queue items for the layer. - self.assertIsNone(notification_queue.get()) - - def notification_tuple(self, notification): - # TODO: Replace this with a method once we refactor the notification stuff into its - # own module. - return AttrDict( - { - "event_config_dict": json.loads(notification.event_config_json), - "method_config_dict": json.loads(notification.config_json), - } - ) - - def test_notification_no_new_layers_increased_severity(self): - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest", include_storage=True) - layer_id = "%s.%s" % (layer.docker_image_id, layer.storage.uuid) - - # Add a repo event for the layer. - repo = model.repository.get_repository(ADMIN_ACCESS_USER, SIMPLE_REPO) - notification = model.notification.create_repo_notification( - repo, "vulnerability_found", "quay_notification", {}, {"level": 100} - ) - - # Ensure that there are no event queue items for the layer. - self.assertIsNone(notification_queue.get()) - - # Fire off the notification processing. - with fake_security_scanner() as security_scanner: - analyzer = LayerAnalyzer(app.config, self.api) - analyzer.analyze_recursively(layer) - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest") - self.assertAnalyzed(layer, security_scanner, True, 1) - - old_vuln_info = { - "Name": "CVE-TEST", - "Namespace": "debian:8", - "Description": "Some service", - "Link": "https://security-tracker.debian.org/tracker/CVE-2014-9471", - "Severity": "Low", - } - - new_vuln_info = { - "Name": "CVE-TEST", - "Namespace": "debian:8", - "Description": "Some service", - "Link": "https://security-tracker.debian.org/tracker/CVE-2014-9471", - "Severity": "Critical", - "FixedIn": {"Version": "9.23-5"}, - } - - security_scanner.set_vulns(layer_id, [new_vuln_info]) - - # Add a notification for the layer. - notification_data = security_scanner.add_notification( - [layer_id], [layer_id], old_vuln_info, new_vuln_info - ) - - # Process the notification. - self.assertTrue(process_notification_data(self.api, notification_data)) - - # Ensure an event was written for the tag. - time.sleep(1) - queue_item = notification_queue.get() - self.assertIsNotNone(queue_item) - - item_body = json.loads(queue_item.body) - self.assertEqual(sorted(["prod", "latest"]), sorted(item_body["event_data"]["tags"])) - self.assertEqual("CVE-TEST", item_body["event_data"]["vulnerability"]["id"]) - self.assertEqual("Critical", item_body["event_data"]["vulnerability"]["priority"]) - self.assertTrue(item_body["event_data"]["vulnerability"]["has_fix"]) - - # Verify that an event would be raised. - event_data = item_body["event_data"] - notification = self.notification_tuple(notification) - self.assertTrue(VulnerabilityFoundEvent().should_perform(event_data, notification)) - - # Create another notification with a matching level and verify it will be raised. - notification = model.notification.create_repo_notification( - repo, "vulnerability_found", "quay_notification", {}, {"level": 1} - ) - - notification = self.notification_tuple(notification) - self.assertTrue(VulnerabilityFoundEvent().should_perform(event_data, notification)) - - # Create another notification with a higher level and verify it won't be raised. - notification = model.notification.create_repo_notification( - repo, "vulnerability_found", "quay_notification", {}, {"level": 0} - ) - notification = self.notification_tuple(notification) - self.assertFalse(VulnerabilityFoundEvent().should_perform(event_data, notification)) - - def test_select_images_to_scan(self): - # Set all images to have a security index of a version to that of the config. - expected_version = app.config["SECURITY_SCANNER_ENGINE_VERSION_TARGET"] - Image.update(security_indexed_engine=expected_version).execute() - - # Ensure no images are available for scanning. - self.assertIsNone(model.image.get_min_id_for_sec_scan(expected_version)) - self.assertTrue(len(model.image.get_images_eligible_for_scan(expected_version)) == 0) - - # Check for a higher version. - self.assertIsNotNone(model.image.get_min_id_for_sec_scan(expected_version + 1)) - self.assertTrue(len(model.image.get_images_eligible_for_scan(expected_version + 1)) > 0) - - def test_notification_worker(self): - layer1 = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest", include_storage=True) - layer2 = _get_legacy_image(ADMIN_ACCESS_USER, COMPLEX_REPO, "prod", include_storage=True) - - # Add a repo events for the layers. - simple_repo = model.repository.get_repository(ADMIN_ACCESS_USER, SIMPLE_REPO) - complex_repo = model.repository.get_repository(ADMIN_ACCESS_USER, COMPLEX_REPO) - - model.notification.create_repo_notification( - simple_repo, "vulnerability_found", "quay_notification", {}, {"level": 100} - ) - model.notification.create_repo_notification( - complex_repo, "vulnerability_found", "quay_notification", {}, {"level": 100} - ) - - # Ensure that there are no event queue items for the layer. - self.assertIsNone(notification_queue.get()) - - with fake_security_scanner() as security_scanner: - # Test with an unknown notification. - worker = SecurityNotificationWorker(None) - self.assertFalse(worker.perform_notification_work({"Name": "unknownnotification"})) - - # Add some analyzed layers. - analyzer = LayerAnalyzer(app.config, self.api) - analyzer.analyze_recursively(layer1) - analyzer.analyze_recursively(layer2) - - # Add a notification with pages of data. - new_vuln_info = { - "Name": "CVE-TEST", - "Namespace": "debian:8", - "Description": "Some service", - "Link": "https://security-tracker.debian.org/tracker/CVE-2014-9471", - "Severity": "Critical", - "FixedIn": {"Version": "9.23-5"}, - } - - security_scanner.set_vulns(security_scanner.layer_id(layer1), [new_vuln_info]) - security_scanner.set_vulns(security_scanner.layer_id(layer2), [new_vuln_info]) - - layer_ids = [security_scanner.layer_id(layer1), security_scanner.layer_id(layer2)] - notification_data = security_scanner.add_notification( - [], layer_ids, None, new_vuln_info - ) - - # Test with a known notification with pages. - data = { - "Name": notification_data["Name"], - } - - worker = SecurityNotificationWorker(None) - self.assertTrue(worker.perform_notification_work(data, layer_limit=2)) - - # Make sure all pages were processed by ensuring we have two notifications. - time.sleep(1) - self.assertIsNotNone(notification_queue.get()) - self.assertIsNotNone(notification_queue.get()) - - def test_notification_worker_offset_pages_not_indexed(self): - # Try without indexes. - self.assert_notification_worker_offset_pages(indexed=False) - - def test_notification_worker_offset_pages_indexed(self): - # Try with indexes. - self.assert_notification_worker_offset_pages(indexed=True) - - def assert_notification_worker_offset_pages(self, indexed=False): - layer1 = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest", include_storage=True) - layer2 = _get_legacy_image(ADMIN_ACCESS_USER, COMPLEX_REPO, "prod", include_storage=True) - - # Add a repo events for the layers. - simple_repo = model.repository.get_repository(ADMIN_ACCESS_USER, SIMPLE_REPO) - complex_repo = model.repository.get_repository(ADMIN_ACCESS_USER, COMPLEX_REPO) - - model.notification.create_repo_notification( - simple_repo, "vulnerability_found", "quay_notification", {}, {"level": 100} - ) - model.notification.create_repo_notification( - complex_repo, "vulnerability_found", "quay_notification", {}, {"level": 100} - ) - - # Ensure that there are no event queue items for the layer. - self.assertIsNone(notification_queue.get()) - - with fake_security_scanner() as security_scanner: - # Test with an unknown notification. - worker = SecurityNotificationWorker(None) - self.assertFalse(worker.perform_notification_work({"Name": "unknownnotification"})) - - # Add some analyzed layers. - analyzer = LayerAnalyzer(app.config, self.api) - analyzer.analyze_recursively(layer1) - analyzer.analyze_recursively(layer2) - - # Add a notification with pages of data. - new_vuln_info = { - "Name": "CVE-TEST", - "Namespace": "debian:8", - "Description": "Some service", - "Link": "https://security-tracker.debian.org/tracker/CVE-2014-9471", - "Severity": "Critical", - "FixedIn": {"Version": "9.23-5"}, - } - - security_scanner.set_vulns(security_scanner.layer_id(layer1), [new_vuln_info]) - security_scanner.set_vulns(security_scanner.layer_id(layer2), [new_vuln_info]) - - # Define offsetting sets of layer IDs, to test cross-pagination support. In this test, we - # will only serve 2 layer IDs per page: the first page will serve both of the 'New' layer IDs, - # but since the first 2 'Old' layer IDs are "earlier" than the shared ID of - # `devtable/simple:latest`, they won't get served in the 'New' list until the *second* page. - # The notification handling system should correctly not notify for this layer, even though it - # is marked 'New' on page 1 and marked 'Old' on page 2. Clair will served these - # IDs sorted in the same manner. - idx_old_layer_ids = [ - {"LayerName": "old1", "Index": 1}, - {"LayerName": "old2", "Index": 2}, - {"LayerName": security_scanner.layer_id(layer1), "Index": 3}, - ] - - idx_new_layer_ids = [ - {"LayerName": security_scanner.layer_id(layer1), "Index": 3}, - {"LayerName": security_scanner.layer_id(layer2), "Index": 4}, - ] - - old_layer_ids = [t["LayerName"] for t in idx_old_layer_ids] - new_layer_ids = [t["LayerName"] for t in idx_new_layer_ids] - - if not indexed: - idx_old_layer_ids = None - idx_new_layer_ids = None - - notification_data = security_scanner.add_notification( - old_layer_ids, - new_layer_ids, - None, - new_vuln_info, - max_per_page=2, - indexed_old_layer_ids=idx_old_layer_ids, - indexed_new_layer_ids=idx_new_layer_ids, - ) - - # Test with a known notification with pages. - data = { - "Name": notification_data["Name"], - } - - worker = SecurityNotificationWorker(None) - self.assertTrue(worker.perform_notification_work(data, layer_limit=2)) - - # Make sure all pages were processed by ensuring we have only one notification. If the second - # page was not processed, then the `Old` entry for layer1 will not be found, and we'd get two - # notifications. - time.sleep(1) - self.assertIsNotNone(notification_queue.get()) - self.assertIsNone(notification_queue.get()) - - def test_layer_gc(self): - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest", include_storage=True) - - # Delete the prod tag so that only the `latest` tag remains. - _delete_tag(ADMIN_ACCESS_USER, SIMPLE_REPO, "prod") - - with fake_security_scanner() as security_scanner: - # Analyze the layer. - analyzer = LayerAnalyzer(app.config, self.api) - analyzer.analyze_recursively(layer) - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest") - self.assertAnalyzed(layer, security_scanner, True, 1) - self.assertTrue(security_scanner.has_layer(security_scanner.layer_id(layer))) - - namespace_user = model.user.get_user(ADMIN_ACCESS_USER) - model.user.change_user_tag_expiration(namespace_user, 0) - - # Delete the tag in the repository and GC. - _delete_tag(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest") - time.sleep(1) - - repo = model.repository.get_repository(ADMIN_ACCESS_USER, SIMPLE_REPO) - model.gc.garbage_collect_repo(repo) - - # Ensure that the security scanner no longer has the image. - self.assertFalse(security_scanner.has_layer(security_scanner.layer_id(layer))) + self.assertEquals(result["Layer"]["Name"], security_scanner.layer_id(manifest)) if __name__ == "__main__": diff --git a/test/testconfig.py b/test/testconfig.py index 301f7f71c..a9a1aac71 100644 --- a/test/testconfig.py +++ b/test/testconfig.py @@ -66,7 +66,6 @@ class TestConfig(DefaultConfig): SECURITY_SCANNER_ENGINE_VERSION_TARGET = 1 SECURITY_SCANNER_API_TIMEOUT_SECONDS = 1 SECURITY_SCANNER_V4_ENDPOINT = "http://fakesecurityscanner/" - SECURITY_SCANNER_V4_NAMESPACE_WHITELIST = ["devtable"] FEATURE_SIGNING = True diff --git a/util/config/configutil.py b/util/config/configutil.py index e9aa44813..0e2e50a0f 100644 --- a/util/config/configutil.py +++ b/util/config/configutil.py @@ -48,15 +48,6 @@ def add_enterprise_config_defaults(config_obj, current_secret_key): config_obj["REPO_MIRROR_TLS_VERIFY"] = config_obj.get("REPO_MIRROR_TLS_VERIFY", True) config_obj["REPO_MIRROR_SERVER_HOSTNAME"] = config_obj.get("REPO_MIRROR_SERVER_HOSTNAME", None) - # Default the signer config. - config_obj["GPG2_PRIVATE_KEY_FILENAME"] = config_obj.get( - "GPG2_PRIVATE_KEY_FILENAME", "signing-private.gpg" - ) - config_obj["GPG2_PUBLIC_KEY_FILENAME"] = config_obj.get( - "GPG2_PUBLIC_KEY_FILENAME", "signing-public.gpg" - ) - config_obj["SIGNING_ENGINE"] = config_obj.get("SIGNING_ENGINE", "gpg2") - # Default security scanner config. config_obj["FEATURE_SECURITY_NOTIFICATIONS"] = config_obj.get( "FEATURE_SECURITY_NOTIFICATIONS", True diff --git a/util/config/schema.py b/util/config/schema.py index 4e543da2f..cf35f0c12 100644 --- a/util/config/schema.py +++ b/util/config/schema.py @@ -18,6 +18,7 @@ INTERNAL_ONLY_PROPERTIES = { "FEATURE_REPOSITORY_ACTION_COUNTER", "APP_REGISTRY_PACKAGE_LIST_CACHE_WHITELIST", "APP_REGISTRY_SHOW_PACKAGE_CACHE_WHITELIST", + "FEATURE_MANIFEST_SIZE_BACKFILL", "TESTING", "SEND_FILE_MAX_AGE_DEFAULT", "DISABLED_FOR_AUDIT_LOGS", @@ -29,7 +30,6 @@ INTERNAL_ONLY_PROPERTIES = { "REPLICATION_QUEUE_NAME", "DOCKERFILE_BUILD_QUEUE_NAME", "CHUNK_CLEANUP_QUEUE_NAME", - "SECSCAN_NOTIFICATION_QUEUE_NAME", "SECURITY_SCANNER_ISSUER_NAME", "NOTIFICATION_QUEUE_NAME", "REPOSITORY_GC_QUEUE_NAME", @@ -57,7 +57,6 @@ INTERNAL_ONLY_PROPERTIES = { "JWTPROXY_AUDIENCE", "JWTPROXY_SIGNER", "SECURITY_SCANNER_INDEXING_MIN_ID", - "SECURITY_SCANNER_V4_NAMESPACE_WHITELIST", "SECURITY_SCANNER_V4_REINDEX_THRESHOLD", "STATIC_SITE_BUCKET", "LABEL_KEY_RESERVED_PREFIXES", diff --git a/util/config/validator.py b/util/config/validator.py index 69a0f6339..6e7c88c0d 100644 --- a/util/config/validator.py +++ b/util/config/validator.py @@ -12,7 +12,6 @@ from util.config.validators.validate_ldap import LDAPValidator from util.config.validators.validate_keystone import KeystoneValidator from util.config.validators.validate_jwt import JWTAuthValidator from util.config.validators.validate_secscan import SecurityScannerValidator -from util.config.validators.validate_signer import SignerValidator from util.config.validators.validate_ssl import SSLValidator, SSL_FILENAMES from util.config.validators.validate_google_login import GoogleLoginValidator from util.config.validators.validate_bitbucket_trigger import BitbucketTriggerValidator @@ -62,7 +61,6 @@ VALIDATORS = { LDAPValidator.name: LDAPValidator.validate, JWTAuthValidator.name: JWTAuthValidator.validate, KeystoneValidator.name: KeystoneValidator.validate, - SignerValidator.name: SignerValidator.validate, SecurityScannerValidator.name: SecurityScannerValidator.validate, OIDCLoginValidator.name: OIDCLoginValidator.validate, TimeMachineValidator.name: TimeMachineValidator.validate, diff --git a/util/config/validators/test/test_validate_signer.py b/util/config/validators/test/test_validate_signer.py deleted file mode 100644 index d7ac8bccb..000000000 --- a/util/config/validators/test/test_validate_signer.py +++ /dev/null @@ -1,24 +0,0 @@ -import pytest - -from util.config.validator import ValidatorContext -from util.config.validators import ConfigValidationException -from util.config.validators.validate_signer import SignerValidator - -from test.fixtures import * - - -@pytest.mark.parametrize( - "unvalidated_config,expected", - [ - ({}, None), - ({"SIGNING_ENGINE": "foobar"}, ConfigValidationException), - ({"SIGNING_ENGINE": "gpg2"}, Exception), - ], -) -def test_validate_signer(unvalidated_config, expected, app): - validator = SignerValidator() - if expected is not None: - with pytest.raises(expected): - validator.validate(ValidatorContext(unvalidated_config)) - else: - validator.validate(ValidatorContext(unvalidated_config)) diff --git a/util/config/validators/validate_signer.py b/util/config/validators/validate_signer.py deleted file mode 100644 index 4b31fc58f..000000000 --- a/util/config/validators/validate_signer.py +++ /dev/null @@ -1,25 +0,0 @@ -from io import StringIO - -from util.config.validators import BaseValidator, ConfigValidationException -from util.security.signing import SIGNING_ENGINES - - -class SignerValidator(BaseValidator): - name = "signer" - - @classmethod - def validate(cls, validator_context): - """ - Validates the GPG public+private key pair used for signing converted ACIs. - """ - config = validator_context.config - config_provider = validator_context.config_provider - - if config.get("SIGNING_ENGINE") is None: - return - - if config["SIGNING_ENGINE"] not in SIGNING_ENGINES: - raise ConfigValidationException("Unknown signing engine: %s" % config["SIGNING_ENGINE"]) - - engine = SIGNING_ENGINES[config["SIGNING_ENGINE"]](config, config_provider) - engine.detached_sign(BytesIO(b"test string")) diff --git a/util/registry/aufs.py b/util/registry/aufs.py deleted file mode 100644 index c40158dde..000000000 --- a/util/registry/aufs.py +++ /dev/null @@ -1,38 +0,0 @@ -import os - -AUFS_METADATA = ".wh..wh." -AUFS_WHITEOUT = ".wh." -AUFS_WHITEOUT_PREFIX_LENGTH = len(AUFS_WHITEOUT) - - -def is_aufs_metadata(absolute): - """ - Returns whether the given absolute references an AUFS metadata file. - """ - filename = os.path.basename(absolute) - return filename.startswith(AUFS_METADATA) or absolute.startswith(AUFS_METADATA) - - -def get_deleted_filename(absolute): - """ - Returns the name of the deleted file referenced by the AUFS whiteout file at the given path or - None if the file path does not reference a whiteout file. - """ - filename = os.path.basename(absolute) - if not filename.startswith(AUFS_WHITEOUT): - return None - - return filename[AUFS_WHITEOUT_PREFIX_LENGTH:] - - -def get_deleted_prefix(absolute): - """ - Returns the path prefix of the deleted file referenced by the AUFS whiteout file at the given - path or None if the file path does not reference a whiteout file. - """ - deleted_filename = get_deleted_filename(absolute) - if deleted_filename is None: - return None - - dirname = os.path.dirname(absolute) - return os.path.join("/", dirname, deleted_filename)[1:] diff --git a/util/registry/gzipwrap.py b/util/registry/gzipwrap.py deleted file mode 100644 index 06c00ca88..000000000 --- a/util/registry/gzipwrap.py +++ /dev/null @@ -1,62 +0,0 @@ -from gzip import GzipFile - -# 256K buffer to Gzip -GZIP_BUFFER_SIZE = 1024 * 256 - - -class GzipWrap(object): - def __init__(self, input, filename=None, compresslevel=1): - self.input = iter(input) - self.buffer = b"" - self.zipper = GzipFile( - filename, mode="wb", fileobj=self, compresslevel=compresslevel, mtime=0 - ) - self.is_done = False - - def read(self, size=-1): - if size is None or size < 0: - raise Exception("Call to GzipWrap with unbound size will result in poor performance") - - # If the buffer already has enough bytes, then simply pop them off of - # the beginning and return them. - if len(self.buffer) >= size or self.is_done: - ret = self.buffer[0:size] - self.buffer = self.buffer[size:] - return ret - - # Otherwise, zip the input until we have enough bytes. - while True: - # Attempt to retrieve the next bytes to write. - is_done = False - - input_size = 0 - input_buffer = b"" - while input_size < GZIP_BUFFER_SIZE: - try: - s = next(self.input) - input_buffer += s - input_size = input_size + len(s) - except StopIteration: - is_done = True - break - - self.zipper.write(input_buffer) - - if is_done: - self.zipper.flush() - self.zipper.close() - self.is_done = True - - if len(self.buffer) >= size or is_done: - ret = self.buffer[0:size] - self.buffer = self.buffer[size:] - return ret - - def flush(self): - pass - - def write(self, data): - self.buffer += data - - def close(self): - self.input.close() diff --git a/util/registry/queuefile.py b/util/registry/queuefile.py deleted file mode 100644 index dac068701..000000000 --- a/util/registry/queuefile.py +++ /dev/null @@ -1,87 +0,0 @@ -from multiprocessing.queues import Empty, Queue - - -class QueueFile(object): - """ - Class which implements a file-like interface and reads QueueResult's from a blocking - multiprocessing queue. - """ - - def __init__(self, queue, name=None, timeout=None): - self._queue = queue - self._closed = False - self._done = False - self._buffer = b"" - self._total_size = 0 - self._name = name - self.raised_exception = False - self._exception_handlers = [] - self._timeout = timeout - - def add_exception_handler(self, handler): - self._exception_handlers.append(handler) - - def read(self, size=-1): - # If the queuefile was closed or we have finished, send back any remaining data. - if self._closed or self._done: - if size == -1: - buf = self._buffer - self._buffer = b"" - return buf - - buf = self._buffer[0:size] - self._buffer = self._buffer[size:] - return buf - - # Loop until we reach the requested data size (or forever if all data was requested). - while (len(self._buffer) < size) or (size == -1): - exception = None - try: - result = self._queue.get(block=True, timeout=self._timeout) - exception = result.exception - except Empty as em: - exception = em - - # Check for any exceptions raised by the queue process. - if exception is not None: - self._closed = True - self.raised_exception = True - - # Fire off the exception to any registered handlers. If no handlers were registered, - # then raise the exception locally. - handled = False - for handler in self._exception_handlers: - handler(exception) - handled = True - - if handled: - return b"" - else: - raise exception - - # Check for no further data. If the QueueProcess has finished producing data, then break - # out of the loop to return the data already acquired. - if result.data is None: - self._done = True - break - - # Add the data to the buffer. - self._buffer += result.data - self._total_size += len(result.data) - - # Return the requested slice of the buffer. - if size == -1: - buf = self._buffer - self._buffer = b"" - return buf - - buf = self._buffer[0:size] - self._buffer = self._buffer[size:] - return buf - - def flush(self): - # Purposefully not implemented. - pass - - def close(self): - self._closed = True diff --git a/util/registry/queueprocess.py b/util/registry/queueprocess.py deleted file mode 100644 index eab652459..000000000 --- a/util/registry/queueprocess.py +++ /dev/null @@ -1,81 +0,0 @@ -from multiprocessing import Process, Queue -from collections import namedtuple - -import logging -import multiprocessing -import time -import sys -import traceback - - -logger = multiprocessing.log_to_stderr() -logger.setLevel(logging.INFO) - - -class QueueProcess(object): - """ - Helper class which invokes a worker in a process to produce data for one (or more) queues. - """ - - def __init__(self, get_producer, chunk_size, max_size, args, finished=None): - self._get_producer = get_producer - self._queues = [] - self._chunk_size = chunk_size - self._max_size = max_size - self._args = args or [] - self._finished = finished - - def create_queue(self): - """ - Adds a multiprocessing queue to the list of queues. - - Any queues added will have the data produced appended. - """ - queue = Queue(self._max_size // self._chunk_size) - self._queues.append(queue) - return queue - - @staticmethod - def run_process(target, args, finished=None): - def _target(tar, arg, fin): - try: - tar(*args) - finally: - if fin: - fin() - - Process(target=_target, args=(target, args, finished)).start() - - def run(self): - # Important! gipc is used here because normal multiprocessing does not work - # correctly with gevent when we sleep. - args = (self._get_producer, self._queues, self._chunk_size, self._args) - QueueProcess.run_process(_run, args, finished=self._finished) - - -QueueResult = namedtuple("QueueResult", ["data", "exception"]) - - -def _run(get_producer, queues, chunk_size, args): - producer = get_producer(*args) - while True: - try: - result = QueueResult(producer(chunk_size) or None, None) - except Exception as ex: - message = "%s\n%s" % (str(ex), "".join(traceback.format_exception(*sys.exc_info()))) - result = QueueResult(None, Exception(message)) - - for queue in queues: - try: - queue.put(result, block=True) - except Exception as ex: - logger.exception("Exception writing to queue.") - return - - # Terminate the producer loop if the data produced is empty or an exception occurred. - if result.data is None or result.exception is not None: - break - - # Important! This allows the thread that writes the queue data to the pipe - # to do so. Otherwise, this hangs. - time.sleep(0) diff --git a/util/registry/streamlayerformat.py b/util/registry/streamlayerformat.py deleted file mode 100644 index 39c05ebfb..000000000 --- a/util/registry/streamlayerformat.py +++ /dev/null @@ -1,76 +0,0 @@ -import os -import tarfile - -import marisa_trie - -from util.registry.aufs import is_aufs_metadata, get_deleted_prefix -from util.registry.tarlayerformat import TarLayerFormat - - -class StreamLayerMerger(TarLayerFormat): - """ - Class which creates a generator of the combined TAR data for a set of Docker layers. - """ - - def __init__(self, get_tar_stream_iterator, path_prefix=None, reporter=None): - super(StreamLayerMerger, self).__init__( - get_tar_stream_iterator, path_prefix, reporter=reporter - ) - - self.path_trie = marisa_trie.Trie() - self.path_encountered = set() - - self.deleted_prefix_trie = marisa_trie.Trie() - self.deleted_prefixes_encountered = set() - - def after_tar_layer(self): - # Update the tries. - self.path_trie = marisa_trie.Trie(self.path_encountered) - self.deleted_prefix_trie = marisa_trie.Trie(self.deleted_prefixes_encountered) - - @staticmethod - def _normalize_path(path): - return os.path.relpath(path, "./") - - def _check_deleted(self, absolute): - ubsolute = str(absolute) - for prefix in self.deleted_prefix_trie.iter_prefixes(ubsolute): - if not os.path.relpath(ubsolute, prefix).startswith(".."): - return True - - return False - - def is_skipped_file(self, filename): - absolute = StreamLayerMerger._normalize_path(filename) - - # Skip metadata. - if is_aufs_metadata(absolute): - return True - - # Check if the file is under a deleted path. - if self._check_deleted(absolute): - return True - - # Check if this file has already been encountered somewhere. If so, - # skip it. - ubsolute = str(absolute) - if ubsolute in self.path_trie: - return True - - return False - - def should_append_file(self, filename): - if self.is_skipped_file(filename): - return False - - absolute = StreamLayerMerger._normalize_path(filename) - - # Add any prefix of deleted paths to the prefix list. - deleted_prefix = get_deleted_prefix(absolute) - if deleted_prefix is not None: - self.deleted_prefixes_encountered.add(deleted_prefix) - return False - - # Otherwise, add the path to the encountered list and return it. - self.path_encountered.add(absolute) - return True diff --git a/util/registry/tarlayerformat.py b/util/registry/tarlayerformat.py deleted file mode 100644 index 08d7bd752..000000000 --- a/util/registry/tarlayerformat.py +++ /dev/null @@ -1,202 +0,0 @@ -import os -import tarfile -import copy - -from abc import ABCMeta, abstractmethod -from collections import defaultdict -from six import add_metaclass - -from util.abchelpers import nooper - - -class TarLayerReadException(Exception): - """ - Exception raised when reading a layer has failed. - """ - - pass - - -# 9MB (+ padding below) so that it matches the 10MB expected by Gzip. -CHUNK_SIZE = 1024 * 1024 * 9 - - -@add_metaclass(ABCMeta) -class TarLayerFormatterReporter(object): - @abstractmethod - def report_pass(self, stream_count): - """ - Reports a formatting pass. - """ - pass - - -@nooper -class NoopReporter(TarLayerFormatterReporter): - pass - - -@add_metaclass(ABCMeta) -class TarLayerFormat(object): - """ - Class which creates a generator of the combined TAR data. - """ - - def __init__(self, tar_stream_getter_iterator, path_prefix=None, reporter=None): - self.tar_stream_getter_iterator = tar_stream_getter_iterator - self.path_prefix = path_prefix or "" - self.reporter = reporter or NoopReporter() - - def get_generator(self): - for stream_getter in self.tar_stream_getter_iterator(): - current_tar_stream = stream_getter() - - # Read the current TAR. If it is empty, we just continue - # to the next one. - tar_file = TarLayerFormat._tar_file_from_stream(current_tar_stream) - if not tar_file: - continue - - # For each of the tar entries, yield them IF and ONLY IF we have not - # encountered the path before. - dangling_hard_links = defaultdict(list) - try: - for tar_info in tar_file: - if not self.should_append_file(tar_info.name): - continue - - # Note: We use a copy here because we need to make sure we copy over all the internal - # data of the tar header. We cannot use frombuf(tobuf()), however, because it doesn't - # properly handle large filenames. - clone = copy.deepcopy(tar_info) - clone.name = os.path.join(self.path_prefix, clone.name) - - # If the entry is a *hard* link, then prefix it as well. Soft links are relative. - if clone.linkname and clone.type == tarfile.LNKTYPE: - # If the entry is a dangling hard link, we skip here. Dangling hard links will be handled - # in a second pass. - if self.is_skipped_file(tar_info.linkname): - dangling_hard_links[tar_info.linkname].append(tar_info) - continue - - clone.linkname = os.path.join(self.path_prefix, clone.linkname) - - # Yield the tar header. - yield clone.tobuf() - - # Try to extract any file contents for the tar. If found, we yield them as well. - if tar_info.isreg(): - for block in TarLayerFormat._emit_file(tar_file, tar_info): - yield block - except UnicodeDecodeError as ude: - raise TarLayerReadException("Decode error: %s" % ude) - - # Close the layer stream now that we're done with it. - tar_file.close() - - # If there are any dangling hard links, open a new stream and retarget the dangling hard - # links to a new copy of the contents, which will be placed under the *first* dangling hard - # link's name. - if len(dangling_hard_links) > 0: - tar_file = TarLayerFormat._tar_file_from_stream(stream_getter()) - if not tar_file: - raise TarLayerReadException("Could not re-read tar layer") - - for tar_info in tar_file: - # If we encounter a file that holds the data for a dangling link, - # emit it under the name of the first dangling hard link. All other - # dangling hard links will be retargeted to this first name. - if tar_info.name in dangling_hard_links: - first_dangling = dangling_hard_links[tar_info.name][0] - - # Copy the first dangling hard link, change it to a normal file, - # and emit the deleted file's contents for it. - clone = copy.deepcopy(first_dangling) - clone.name = os.path.join(self.path_prefix, first_dangling.name) - clone.type = tar_info.type - clone.size = tar_info.size - clone.pax_headers = tar_info.pax_headers - yield clone.tobuf() - - for block in TarLayerFormat._emit_file(tar_file, tar_info): - yield block - - elif ( - tar_info.type == tarfile.LNKTYPE - and tar_info.linkname in dangling_hard_links - and not self.is_skipped_file(tar_info.name) - ): - # Retarget if necessary. All dangling hard links (but the first) will - # need to be retargeted. - first_dangling = dangling_hard_links[tar_info.linkname][0] - if tar_info.name == first_dangling.name: - # Skip; the first dangling is handled above. - continue - - # Retarget the hard link to the first dangling hard link. - clone = copy.deepcopy(tar_info) - clone.name = os.path.join(self.path_prefix, clone.name) - clone.linkname = os.path.join(self.path_prefix, first_dangling.name) - yield clone.tobuf() - - # Close the layer stream now that we're done with it. - tar_file.close() - - # Conduct any post-tar work. - self.after_tar_layer() - self.reporter.report_pass(2 if len(dangling_hard_links) > 0 else 1) - - # Last two records are empty in TAR spec. - yield b"\0" * 512 - yield b"\0" * 512 - - @abstractmethod - def is_skipped_file(self, filename): - """ - Returns true if the file with the given name will be skipped during append. - """ - pass - - @abstractmethod - def should_append_file(self, filename): - """ - Returns true if the file with the given name should be appended when producing the new TAR. - """ - pass - - @abstractmethod - def after_tar_layer(self): - """ - Invoked after a TAR layer is added, to do any post-add work. - """ - pass - - @staticmethod - def _tar_file_from_stream(stream): - tar_file = None - try: - tar_file = tarfile.open(mode="r|*", fileobj=stream) - except tarfile.ReadError as re: - if str(re) != "empty file": - raise TarLayerReadException("Could not read layer") - - return tar_file - - @staticmethod - def _emit_file(tar_file, tar_info): - file_stream = tar_file.extractfile(tar_info) - if file_stream is not None: - length = 0 - while True: - current_block = file_stream.read(CHUNK_SIZE) - if not len(current_block): - break - - yield current_block - length += len(current_block) - - file_stream.close() - - # Files must be padding to 512 byte multiples. - if length % 512 != 0: - yield b"\0" * (512 - (length % 512)) diff --git a/util/registry/test/test_queuefile.py b/util/registry/test/test_queuefile.py deleted file mode 100644 index 0595121ac..000000000 --- a/util/registry/test/test_queuefile.py +++ /dev/null @@ -1,118 +0,0 @@ -import os - -import pytest - -from util.registry.queueprocess import QueueResult -from util.registry.queuefile import QueueFile - - -class FakeQueue(object): - def __init__(self): - self.items = [] - - def get(self, block, timeout=None): - return self.items.pop(0) - - def put(self, data): - self.items.append(data) - - -def test_basic(): - queue = FakeQueue() - queue.put(QueueResult(b"hello world", None)) - queue.put(QueueResult(b"! how goes there?", None)) - queue.put(QueueResult(None, None)) - - queuefile = QueueFile(queue) - assert queuefile.read() == b"hello world! how goes there?" - - -def test_chunk_reading(): - queue = FakeQueue() - queue.put(QueueResult(b"hello world", None)) - queue.put(QueueResult(b"! how goes there?", None)) - queue.put(QueueResult(None, None)) - - queuefile = QueueFile(queue) - data = b"" - - while True: - result = queuefile.read(size=2) - if not result: - break - - data += result - - assert data == b"hello world! how goes there?" - - -def test_unhandled_exception(): - queue = FakeQueue() - queue.put(QueueResult(b"hello world", None)) - queue.put(QueueResult(None, IOError("some exception"))) - queue.put(QueueResult(b"! how goes there?", None)) - queue.put(QueueResult(None, None)) - - queuefile = QueueFile(queue) - - with pytest.raises(IOError): - queuefile.read(size=12) - - -def test_handled_exception(): - queue = FakeQueue() - queue.put(QueueResult(b"hello world", None)) - queue.put(QueueResult(None, IOError("some exception"))) - queue.put(QueueResult(b"! how goes there?", None)) - queue.put(QueueResult(None, None)) - - ex_found = [None] - - def handler(ex): - ex_found[0] = ex - - queuefile = QueueFile(queue) - queuefile.add_exception_handler(handler) - queuefile.read(size=12) - - assert ex_found[0] is not None - - -def test_binary_data(): - queue = FakeQueue() - - # Generate some binary data. - binary_data = os.urandom(1024) - queue.put(QueueResult(binary_data, None)) - queue.put(QueueResult(None, None)) - - queuefile = QueueFile(queue) - found_data = b"" - while True: - current_data = queuefile.read(size=37) - if len(current_data) == 0: - break - - found_data = found_data + current_data - - assert found_data == binary_data - - -def test_empty_data(): - queue = FakeQueue() - - # Generate some empty binary data. - binary_data = b"\0" * 1024 - queue.put(QueueResult(binary_data, None)) - queue.put(QueueResult(None, None)) - - queuefile = QueueFile(queue) - found_data = b"" - while True: - current_data = queuefile.read(size=37) - if len(current_data) == 0: - break - - found_data = found_data + current_data - - assert found_data == binary_data diff --git a/util/registry/test/test_streamlayerformat.py b/util/registry/test/test_streamlayerformat.py deleted file mode 100644 index d329f3e9b..000000000 --- a/util/registry/test/test_streamlayerformat.py +++ /dev/null @@ -1,469 +0,0 @@ -import tarfile - -import pytest - -from io import BytesIO -from util.registry.streamlayerformat import StreamLayerMerger -from util.registry.aufs import AUFS_WHITEOUT -from util.registry.tarlayerformat import TarLayerReadException - - -def create_layer(*file_pairs): - output = BytesIO() - with tarfile.open(fileobj=output, mode="w:gz") as tar: - for current_filename, current_contents in file_pairs: - if current_contents is None: - # This is a deleted file. - if current_filename.endswith("/"): - current_filename = current_filename[:-1] - - parts = current_filename.split("/") - if len(parts) > 1: - current_filename = "/".join(parts[:-1]) + "/" + AUFS_WHITEOUT + parts[-1] - else: - current_filename = AUFS_WHITEOUT + parts[-1] - - current_contents = b"" - - if current_contents.startswith(b"linkto:"): - info = tarfile.TarInfo(name=current_filename) - info.linkname = current_contents[len(b"linkto:") :].decode("utf-8") - info.type = tarfile.LNKTYPE - tar.addfile(info) - else: - info = tarfile.TarInfo(name=current_filename) - info.size = len(current_contents) - tar.addfile(info, fileobj=BytesIO(current_contents)) - - return output.getvalue() - - -def create_empty_layer(): - return b"" - - -def squash_layers(layers, path_prefix=None): - def getter_for_layer(layer): - return lambda: BytesIO(layer) - - def layer_stream_getter(): - return [getter_for_layer(layer) for layer in layers] - - merger = StreamLayerMerger(layer_stream_getter, path_prefix=path_prefix) - merged_data = b"".join(list(merger.get_generator())) - return merged_data - - -def assertHasFile(squashed, filename, contents): - with tarfile.open(fileobj=BytesIO(squashed), mode="r:*") as tar: - member = tar.getmember(filename) - assert contents == b"\n".join(tar.extractfile(member).readlines()) - - -def assertDoesNotHaveFile(squashed, filename): - with tarfile.open(fileobj=BytesIO(squashed), mode="r:*") as tar: - try: - member = tar.getmember(filename) - except Exception as ex: - return - - assert False, "Filename %s found" % filename - - -def test_single_layer(): - tar_layer = create_layer( - ("some_file", b"foo"), ("another_file", b"bar"), ("third_file", b"meh") - ) - - squashed = squash_layers([tar_layer]) - - assertHasFile(squashed, "some_file", b"foo") - assertHasFile(squashed, "another_file", b"bar") - assertHasFile(squashed, "third_file", b"meh") - - -def test_multiple_layers(): - second_layer = create_layer( - ("some_file", b"foo"), ("another_file", b"bar"), ("third_file", b"meh") - ) - - first_layer = create_layer(("top_file", b"top")) - - squashed = squash_layers([first_layer, second_layer]) - - assertHasFile(squashed, "some_file", b"foo") - assertHasFile(squashed, "another_file", b"bar") - assertHasFile(squashed, "third_file", b"meh") - assertHasFile(squashed, "top_file", b"top") - - -def test_multiple_layers_dot(): - second_layer = create_layer( - ("./some_file", b"foo"), ("another_file", b"bar"), ("./third_file", b"meh") - ) - - first_layer = create_layer(("top_file", b"top")) - - squashed = squash_layers([first_layer, second_layer]) - - assertHasFile(squashed, "./some_file", b"foo") - assertHasFile(squashed, "another_file", b"bar") - assertHasFile(squashed, "./third_file", b"meh") - assertHasFile(squashed, "top_file", b"top") - - -def test_multiple_layers_overwrite(): - second_layer = create_layer( - ("some_file", b"foo"), ("another_file", b"bar"), ("third_file", b"meh") - ) - - first_layer = create_layer(("another_file", b"top")) - - squashed = squash_layers([first_layer, second_layer]) - - assertHasFile(squashed, "some_file", b"foo") - assertHasFile(squashed, "third_file", b"meh") - assertHasFile(squashed, "another_file", b"top") - - -def test_multiple_layers_overwrite_base_dot(): - second_layer = create_layer( - ("some_file", b"foo"), ("./another_file", b"bar"), ("third_file", b"meh") - ) - - first_layer = create_layer(("another_file", b"top")) - - squashed = squash_layers([first_layer, second_layer]) - - assertHasFile(squashed, "some_file", b"foo") - assertHasFile(squashed, "third_file", b"meh") - assertHasFile(squashed, "another_file", b"top") - assertDoesNotHaveFile(squashed, "./another_file") - - -def test_multiple_layers_overwrite_top_dot(): - second_layer = create_layer( - ("some_file", b"foo"), ("another_file", b"bar"), ("third_file", b"meh") - ) - - first_layer = create_layer(("./another_file", b"top")) - - squashed = squash_layers([first_layer, second_layer]) - - assertHasFile(squashed, "some_file", b"foo") - assertHasFile(squashed, "third_file", b"meh") - assertHasFile(squashed, "./another_file", b"top") - assertDoesNotHaveFile(squashed, "another_file") - - -def test_deleted_file(): - second_layer = create_layer( - ("some_file", b"foo"), ("another_file", b"bar"), ("third_file", b"meh") - ) - - first_layer = create_layer(("another_file", None)) - - squashed = squash_layers([first_layer, second_layer]) - - assertHasFile(squashed, "some_file", b"foo") - assertHasFile(squashed, "third_file", b"meh") - assertDoesNotHaveFile(squashed, "another_file") - - -def test_deleted_readded_file(): - third_layer = create_layer(("another_file", b"bar")) - - second_layer = create_layer( - ("some_file", b"foo"), ("another_file", None), ("third_file", b"meh") - ) - - first_layer = create_layer(("another_file", b"newagain")) - - squashed = squash_layers([first_layer, second_layer, third_layer]) - - assertHasFile(squashed, "some_file", b"foo") - assertHasFile(squashed, "third_file", b"meh") - assertHasFile(squashed, "another_file", b"newagain") - - -def test_deleted_in_lower_layer(): - third_layer = create_layer(("deleted_file", b"bar")) - - second_layer = create_layer( - ("some_file", b"foo"), ("deleted_file", None), ("third_file", b"meh") - ) - - first_layer = create_layer(("top_file", b"top")) - - squashed = squash_layers([first_layer, second_layer, third_layer]) - - assertHasFile(squashed, "some_file", b"foo") - assertHasFile(squashed, "third_file", b"meh") - assertHasFile(squashed, "top_file", b"top") - assertDoesNotHaveFile(squashed, "deleted_file") - - -def test_deleted_in_lower_layer_with_added_dot(): - third_layer = create_layer(("./deleted_file", b"something")) - - second_layer = create_layer(("deleted_file", None)) - - squashed = squash_layers([second_layer, third_layer]) - assertDoesNotHaveFile(squashed, "deleted_file") - - -def test_deleted_in_lower_layer_with_deleted_dot(): - third_layer = create_layer(("./deleted_file", b"something")) - - second_layer = create_layer(("./deleted_file", None)) - - squashed = squash_layers([second_layer, third_layer]) - assertDoesNotHaveFile(squashed, "deleted_file") - - -def test_directory(): - second_layer = create_layer(("foo/some_file", b"foo"), ("foo/another_file", b"bar")) - - first_layer = create_layer(("foo/some_file", b"top")) - - squashed = squash_layers([first_layer, second_layer]) - - assertHasFile(squashed, "foo/some_file", b"top") - assertHasFile(squashed, "foo/another_file", b"bar") - - -def test_sub_directory(): - second_layer = create_layer(("foo/some_file", b"foo"), ("foo/bar/another_file", b"bar")) - - first_layer = create_layer(("foo/some_file", b"top")) - - squashed = squash_layers([first_layer, second_layer]) - - assertHasFile(squashed, "foo/some_file", b"top") - assertHasFile(squashed, "foo/bar/another_file", b"bar") - - -def test_delete_directory(): - second_layer = create_layer(("foo/some_file", b"foo"), ("foo/another_file", b"bar")) - - first_layer = create_layer(("foo/", None)) - - squashed = squash_layers([first_layer, second_layer]) - - assertDoesNotHaveFile(squashed, "foo/some_file") - assertDoesNotHaveFile(squashed, "foo/another_file") - - -def test_delete_sub_directory(): - second_layer = create_layer(("foo/some_file", b"foo"), ("foo/bar/another_file", b"bar")) - - first_layer = create_layer(("foo/bar/", None)) - - squashed = squash_layers([first_layer, second_layer]) - - assertDoesNotHaveFile(squashed, "foo/bar/another_file") - assertHasFile(squashed, "foo/some_file", b"foo") - - -def test_delete_sub_directory_with_dot(): - second_layer = create_layer(("foo/some_file", b"foo"), ("foo/bar/another_file", b"bar")) - - first_layer = create_layer(("./foo/bar/", None)) - - squashed = squash_layers([first_layer, second_layer]) - - assertDoesNotHaveFile(squashed, "foo/bar/another_file") - assertHasFile(squashed, "foo/some_file", b"foo") - - -def test_delete_sub_directory_with_subdot(): - second_layer = create_layer(("./foo/some_file", b"foo"), ("./foo/bar/another_file", b"bar")) - - first_layer = create_layer(("foo/bar/", None)) - - squashed = squash_layers([first_layer, second_layer]) - - assertDoesNotHaveFile(squashed, "foo/bar/another_file") - assertDoesNotHaveFile(squashed, "./foo/bar/another_file") - assertHasFile(squashed, "./foo/some_file", b"foo") - - -def test_delete_directory_recreate(): - third_layer = create_layer(("foo/some_file", b"foo"), ("foo/another_file", b"bar")) - - second_layer = create_layer(("foo/", None)) - - first_layer = create_layer(("foo/some_file", b"baz")) - - squashed = squash_layers([first_layer, second_layer, third_layer]) - - assertHasFile(squashed, "foo/some_file", b"baz") - assertDoesNotHaveFile(squashed, "foo/another_file") - - -def test_delete_directory_prefix(): - third_layer = create_layer(("foobar/some_file", b"foo"), ("foo/another_file", b"bar")) - - second_layer = create_layer(("foo/", None)) - - squashed = squash_layers([second_layer, third_layer]) - - assertHasFile(squashed, "foobar/some_file", b"foo") - assertDoesNotHaveFile(squashed, "foo/another_file") - - -def test_delete_directory_pre_prefix(): - third_layer = create_layer(("foobar/baz/some_file", b"foo"), ("foo/another_file", b"bar")) - - second_layer = create_layer(("foo/", None)) - - squashed = squash_layers([second_layer, third_layer]) - - assertHasFile(squashed, "foobar/baz/some_file", b"foo") - assertDoesNotHaveFile(squashed, "foo/another_file") - - -def test_delete_root_directory(): - third_layer = create_layer(("build/first_file", b"foo"), ("build/second_file", b"bar")) - - second_layer = create_layer(("build", None)) - - squashed = squash_layers([second_layer, third_layer]) - - assertDoesNotHaveFile(squashed, "build/first_file") - assertDoesNotHaveFile(squashed, "build/second_file") - - -def test_tar_empty_layer(): - third_layer = create_layer(("build/first_file", b"foo"), ("build/second_file", b"bar")) - - empty_layer = create_layer() - - squashed = squash_layers([empty_layer, third_layer]) - - assertHasFile(squashed, "build/first_file", b"foo") - assertHasFile(squashed, "build/second_file", b"bar") - - -def test_data_empty_layer(): - third_layer = create_layer(("build/first_file", b"foo"), ("build/second_file", b"bar")) - - empty_layer = create_empty_layer() - - squashed = squash_layers([empty_layer, third_layer]) - - assertHasFile(squashed, "build/first_file", b"foo") - assertHasFile(squashed, "build/second_file", b"bar") - - -def test_broken_layer(): - third_layer = create_layer(("build/first_file", b"foo"), ("build/second_file", b"bar")) - - broken_layer = b"not valid data" - - with pytest.raises(TarLayerReadException): - squash_layers([broken_layer, third_layer]) - - -def test_single_layer_with_prefix(): - tar_layer = create_layer( - ("some_file", b"foo"), ("another_file", b"bar"), ("third_file", b"meh") - ) - - squashed = squash_layers([tar_layer], path_prefix="foo/") - - assertHasFile(squashed, "foo/some_file", b"foo") - assertHasFile(squashed, "foo/another_file", b"bar") - assertHasFile(squashed, "foo/third_file", b"meh") - - -def test_multiple_layers_overwrite_with_prefix(): - second_layer = create_layer( - ("some_file", b"foo"), ("another_file", b"bar"), ("third_file", b"meh") - ) - - first_layer = create_layer(("another_file", b"top")) - - squashed = squash_layers([first_layer, second_layer], path_prefix="foo/") - - assertHasFile(squashed, "foo/some_file", b"foo") - assertHasFile(squashed, "foo/third_file", b"meh") - assertHasFile(squashed, "foo/another_file", b"top") - - -def test_superlong_filename(): - tar_layer = create_layer( - ( - "this_is_the_filename_that_never_ends_it_goes_on_and_on_my_friend_some_people_started", - b"meh", - ) - ) - - squashed = squash_layers([tar_layer], path_prefix="foo/") - assertHasFile( - squashed, - "foo/this_is_the_filename_that_never_ends_it_goes_on_and_on_my_friend_some_people_started", - b"meh", - ) - - -def test_superlong_prefix(): - tar_layer = create_layer( - ("some_file", b"foo"), ("another_file", b"bar"), ("third_file", b"meh") - ) - - squashed = squash_layers( - [tar_layer], - path_prefix="foo/bar/baz/something/foo/bar/baz/anotherthing/whatever/this/is/a/really/long/filename/that/goes/here/", - ) - - assertHasFile( - squashed, - "foo/bar/baz/something/foo/bar/baz/anotherthing/whatever/this/is/a/really/long/filename/that/goes/here/some_file", - b"foo", - ) - assertHasFile( - squashed, - "foo/bar/baz/something/foo/bar/baz/anotherthing/whatever/this/is/a/really/long/filename/that/goes/here/another_file", - b"bar", - ) - assertHasFile( - squashed, - "foo/bar/baz/something/foo/bar/baz/anotherthing/whatever/this/is/a/really/long/filename/that/goes/here/third_file", - b"meh", - ) - - -def test_hardlink_to_deleted_file(): - first_layer = create_layer( - ("tobedeletedfile", b"somecontents"), - ("link_to_deleted_file", b"linkto:tobedeletedfile"), - ("third_file", b"meh"), - ) - - second_layer = create_layer(("tobedeletedfile", None)) - - squashed = squash_layers([second_layer, first_layer], path_prefix="foo/") - - assertHasFile(squashed, "foo/third_file", b"meh") - assertHasFile(squashed, "foo/link_to_deleted_file", b"somecontents") - assertDoesNotHaveFile(squashed, "foo/tobedeletedfile") - - -def test_multiple_hardlink_to_deleted_file(): - first_layer = create_layer( - ("tobedeletedfile", b"somecontents"), - ("link_to_deleted_file", b"linkto:tobedeletedfile"), - ("another_link_to_deleted_file", b"linkto:tobedeletedfile"), - ("third_file", b"meh"), - ) - - second_layer = create_layer(("tobedeletedfile", None)) - - squashed = squash_layers([second_layer, first_layer], path_prefix="foo/") - - assertHasFile(squashed, "foo/third_file", b"meh") - assertHasFile(squashed, "foo/link_to_deleted_file", b"somecontents") - assertHasFile(squashed, "foo/another_link_to_deleted_file", b"somecontents") - - assertDoesNotHaveFile(squashed, "foo/tobedeletedfile") diff --git a/util/repomirror/api.py b/util/repomirror/api.py index f76d3148b..2037026a7 100644 --- a/util/repomirror/api.py +++ b/util/repomirror/api.py @@ -96,7 +96,7 @@ class RepoMirrorAPIInterface(object): Posts the given repository to the repo mirror for processing, blocking until complete. Returns the analysis version on success or raises an exception deriving from - AnalyzeLayerException on failure. Callers should handle all cases of AnalyzeLayerException. + RepoMirrorException on failure. Callers should handle all cases of RepoMirrorException. """ pass diff --git a/util/secscan/analyzer.py b/util/secscan/analyzer.py deleted file mode 100644 index 91dc645a2..000000000 --- a/util/secscan/analyzer.py +++ /dev/null @@ -1,239 +0,0 @@ -import logging -import logging.config - -from collections import defaultdict - -import features - -from data.database import ExternalNotificationEvent, IMAGE_NOT_SCANNED_ENGINE_VERSION, Image -from data.model.oci.tag import filter_tags_have_repository_event, get_tags_for_legacy_image -from data.model.image import set_secscan_status, get_image_with_storage_and_parent_base -from notifications import spawn_notification -from util.secscan import PRIORITY_LEVELS -from util.secscan.api import ( - APIRequestFailure, - AnalyzeLayerException, - MissingParentLayerException, - InvalidLayerException, - AnalyzeLayerRetryException, -) -from util.morecollections import AttrDict - - -logger = logging.getLogger(__name__) - - -class PreemptedException(Exception): - """ - Exception raised if another worker analyzed the image before this worker was able to do so. - """ - - -class LayerAnalyzer(object): - """ - Helper class to perform analysis of a layer via the security scanner. - """ - - def __init__(self, config, api): - self._api = api - self._target_version = config.get("SECURITY_SCANNER_ENGINE_VERSION_TARGET", 2) - - def analyze_recursively(self, layer): - """ - Analyzes a layer and all its parents. - - Raises a PreemptedException if the analysis was preempted by another worker. - """ - try: - self._analyze_recursively_and_check(layer) - except MissingParentLayerException: - # The parent layer of this layer was missing. Force a reanalyze. - try: - self._analyze_recursively_and_check(layer, force_parents=True) - except MissingParentLayerException: - # Parent is still missing... mark the layer as invalid. - if not set_secscan_status(layer, False, self._target_version): - raise PreemptedException - - def _analyze_recursively_and_check(self, layer, force_parents=False): - """ - Analyzes a layer and all its parents, optionally forcing parents to be reanalyzed, and - checking for various exceptions that can occur during analysis. - """ - try: - self._analyze_recursively(layer, force_parents=force_parents) - except InvalidLayerException: - # One of the parent layers is invalid, so this layer is invalid as well. - if not set_secscan_status(layer, False, self._target_version): - raise PreemptedException - except AnalyzeLayerRetryException: - # Something went wrong when trying to analyze the layer, but we should retry, so leave - # the layer unindexed. Another worker will come along and handle it. - raise APIRequestFailure - except MissingParentLayerException: - # Pass upward, as missing parent is handled in the analyze_recursively method. - raise - except AnalyzeLayerException: - # Something went wrong when trying to analyze the layer and we cannot retry, so mark the - # layer as invalid. - logger.exception( - "Got exception when trying to analyze layer %s via security scanner", layer.id - ) - if not set_secscan_status(layer, False, self._target_version): - raise PreemptedException - - def _analyze_recursively(self, layer, force_parents=False): - # Check if there is a parent layer that needs to be analyzed. - if layer.parent_id and ( - force_parents or layer.parent.security_indexed_engine < self._target_version - ): - try: - base_query = get_image_with_storage_and_parent_base() - parent_layer = base_query.where(Image.id == layer.parent_id).get() - except Image.DoesNotExist: - logger.warning( - "Image %s has Image %s as parent but doesn't exist.", layer.id, layer.parent_id - ) - raise AnalyzeLayerException("Parent image not found") - - self._analyze_recursively(parent_layer, force_parents=force_parents) - - # Analyze the layer itself. - self._analyze(layer, force_parents=force_parents) - - def _analyze(self, layer, force_parents=False): - """ - Analyzes a single layer. - - Return a tuple of two bools: - - The first one tells us if we should evaluate its children. - - The second one is set to False when another worker pre-empted the candidate's analysis - for us. - """ - # If the parent couldn't be analyzed with the target version or higher, we can't analyze - # this image. Mark it as failed with the current target version. - if not force_parents and ( - layer.parent_id - and not layer.parent.security_indexed - and layer.parent.security_indexed_engine >= self._target_version - ): - if not set_secscan_status(layer, False, self._target_version): - raise PreemptedException - - # Nothing more to do. - return - - # Make sure the image's storage is not marked as uploading. If so, nothing more to do. - if layer.storage.uploading: - if not set_secscan_status(layer, False, self._target_version): - raise PreemptedException - - # Nothing more to do. - return - - # Analyze the image. - previously_security_indexed_successfully = layer.security_indexed - previous_security_indexed_engine = layer.security_indexed_engine - - logger.debug("Analyzing layer %s", layer.docker_image_id) - analyzed_version = self._api.analyze_layer(layer) - - logger.debug( - "Analyzed layer %s successfully with version %s", - layer.docker_image_id, - analyzed_version, - ) - - # Mark the image as analyzed. - if not set_secscan_status(layer, True, analyzed_version): - # If the image was previously successfully marked as resolved, then set_secscan_status - # might return False because we're not changing it (since this is a fixup). - if not previously_security_indexed_successfully: - raise PreemptedException - - # If we are the one who've done the job successfully first, then we need to decide if we should - # send notifications. Notifications are sent if: - # 1) This is a new layer - # 2) This is an existing layer that previously did not index properly - # We don't always send notifications as if we are re-indexing a successful layer for a newer - # feature set in the security scanner, notifications will be spammy. - is_new_image = previous_security_indexed_engine == IMAGE_NOT_SCANNED_ENGINE_VERSION - is_existing_image_unindexed = ( - not is_new_image and not previously_security_indexed_successfully - ) - if features.SECURITY_NOTIFICATIONS and (is_new_image or is_existing_image_unindexed): - # Get the tags of the layer we analyzed. - repository_map = defaultdict(list) - event = ExternalNotificationEvent.get(name="vulnerability_found") - - # NOTE: This should really use the registry_model, but as this whole analyzer is - # now deprecated, we'll keep calling into the model directly for the time being. - matching = list( - filter_tags_have_repository_event(get_tags_for_legacy_image(layer.id), event) - ) - - for tag in matching: - repository_map[tag.repository_id].append(tag) - - # If there is at least one tag, - # Lookup the vulnerabilities for the image, now that it is analyzed. - if len(repository_map) > 0: - logger.debug("Loading data for layer %s", layer.id) - try: - layer_data = self._api.get_layer_data(layer, include_vulnerabilities=True) - except APIRequestFailure: - raise - - if layer_data is not None: - # Dispatch events for any detected vulnerabilities - logger.debug("Got data for layer %s: %s", layer.id, layer_data) - found_features = layer_data["Layer"].get("Features", []) - for repository_id in repository_map: - tags = repository_map[repository_id] - vulnerabilities = dict() - - # Collect all the vulnerabilities found for the layer under each repository and send - # as a batch notification. - for feature in found_features: - if "Vulnerabilities" not in feature: - continue - - for vulnerability in feature.get("Vulnerabilities", []): - vuln_data = { - "id": vulnerability["Name"], - "description": vulnerability.get("Description", None), - "link": vulnerability.get("Link", None), - "has_fix": "FixedBy" in vulnerability, - # TODO: Change this key name if/when we change the event format. - "priority": vulnerability.get("Severity", "Unknown"), - } - - vulnerabilities[vulnerability["Name"]] = vuln_data - - # TODO: remove when more endpoints have been converted to using - # interfaces - repository = AttrDict( - { - "namespace_name": tags[0].repository.namespace_user.username, - "name": tags[0].repository.name, - } - ) - - repo_vulnerabilities = list(vulnerabilities.values()) - if not repo_vulnerabilities: - continue - - priority_key = lambda v: PRIORITY_LEVELS.get(v["priority"], {}).get( - "index", 100 - ) - repo_vulnerabilities.sort(key=priority_key) - - event_data = { - "tags": [tag.name for tag in tags], - "vulnerabilities": repo_vulnerabilities, - "vulnerability": repo_vulnerabilities[ - 0 - ], # For back-compat with existing events. - } - - spawn_notification(repository, "vulnerability_found", event_data) diff --git a/util/secscan/api.py b/util/secscan/api.py index a860cfaea..e5d6a22f0 100644 --- a/util/secscan/api.py +++ b/util/secscan/api.py @@ -8,14 +8,11 @@ from urllib.parse import urljoin import requests from data import model -from data.database import CloseForLongOperation, TagManifest, Image, Manifest, ManifestLegacyImage -from data.model.storage import get_storage_locations -from data.model.image import get_image_with_storage +from data.database import CloseForLongOperation, Image, Manifest, ManifestLegacyImage from data.registry_model.datatypes import Manifest as ManifestDataType, LegacyImage from util.abchelpers import nooper from util.failover import failover, FailoverException from util.secscan.validator import V2SecurityConfigValidator -from util.security.registry_jwt import generate_bearer_token, build_context_and_subject from _init import CONF_DIR @@ -30,31 +27,6 @@ DEFAULT_HTTP_HEADERS = {"Connection": "close"} logger = logging.getLogger(__name__) -class AnalyzeLayerException(Exception): - """ - Exception raised when a layer fails to analyze due to a request issue. - """ - - -class AnalyzeLayerRetryException(Exception): - """ - Exception raised when a layer fails to analyze due to a request issue, and the request should be - retried. - """ - - -class MissingParentLayerException(AnalyzeLayerException): - """ - Exception raised when the parent of the layer is missing from the security scanner. - """ - - -class InvalidLayerException(AnalyzeLayerException): - """ - Exception raised when the layer itself cannot be handled by the security scanner. - """ - - class APIRequestFailure(Exception): """ Exception raised when there is a failure to conduct an API request. @@ -71,11 +43,7 @@ class Non200ResponseException(Exception): self.response = response -_API_METHOD_INSERT = "layers" _API_METHOD_GET_LAYER = "layers/%s" -_API_METHOD_DELETE_LAYER = "layers/%s" -_API_METHOD_MARK_NOTIFICATION_READ = "notifications/%s" -_API_METHOD_GET_NOTIFICATION = "notifications/%s" _API_METHOD_PING = "metrics" @@ -83,18 +51,13 @@ def compute_layer_id(layer): """ Returns the ID for the layer in the security scanner. """ - # NOTE: this is temporary until we switch to Clair V3. - if isinstance(layer, ManifestDataType): - if layer._is_tag_manifest: - layer = TagManifest.get(id=layer._db_id).tag.image - else: - manifest = Manifest.get(id=layer._db_id) - try: - layer = ManifestLegacyImage.get(manifest=manifest).image - except ManifestLegacyImage.DoesNotExist: - return None - elif isinstance(layer, LegacyImage): - layer = Image.get(id=layer._db_id) + assert isinstance(layer, ManifestDataType) + + manifest = Manifest.get(id=layer._db_id) + try: + layer = ManifestLegacyImage.get(manifest=manifest).image + except ManifestLegacyImage.DoesNotExist: + return None assert layer.docker_image_id assert layer.storage.uuid @@ -147,14 +110,6 @@ class SecurityScannerAPIInterface(object): Helper class for talking to the Security Scan service (usually Clair). """ - @abstractmethod - def cleanup_layers(self, layers): - """ - Callback invoked by garbage collection to cleanup any layers that no longer need to be - stored in the security scanner. - """ - pass - @abstractmethod def ping(self): """ @@ -165,23 +120,6 @@ class SecurityScannerAPIInterface(object): """ pass - @abstractmethod - def delete_layer(self, layer): - """ - Calls DELETE on the given layer in the security scanner, removing it from its database. - """ - pass - - @abstractmethod - def analyze_layer(self, layer): - """ - Posts the given layer to the security scanner for analysis, blocking until complete. - - Returns the analysis version on success or raises an exception deriving from - AnalyzeLayerException on failure. Callers should handle all cases of AnalyzeLayerException. - """ - pass - @abstractmethod def check_layer_vulnerable(self, layer_id, cve_name): """ @@ -189,22 +127,6 @@ class SecurityScannerAPIInterface(object): """ pass - @abstractmethod - def get_notification(self, notification_name, layer_limit=100, page=None): - """ - Gets the data for a specific notification, with optional page token. - - Returns a tuple of the data (None on failure) and whether to retry. - """ - pass - - @abstractmethod - def mark_notification_read(self, notification_name): - """ - Marks a security scanner notification as read. - """ - pass - @abstractmethod def get_layer_data(self, layer, include_features=False, include_vulnerabilities=False): """ @@ -242,94 +164,6 @@ class ImplementedSecurityScannerAPI(SecurityScannerAPIInterface): self._target_version = config.get("SECURITY_SCANNER_ENGINE_VERSION_TARGET", 2) self._uri_creator = uri_creator - def _get_image_url_and_auth(self, image): - """ - Returns a tuple of the url and the auth header value that must be used to fetch the layer - data itself. - - If the image can't be addressed, we return None. - """ - if self._instance_keys is None: - raise Exception("No Instance keys provided to Security Scanner API") - - path = model.storage.get_layer_path(image.storage) - locations = self._default_storage_locations - - if not self._storage.exists(locations, path): - locations = get_storage_locations(image.storage.uuid) - if not locations or not self._storage.exists(locations, path): - logger.warning( - "Could not find a valid location to download layer %s out of %s", - compute_layer_id(image), - locations, - ) - return None, None - - uri = self._storage.get_direct_download_url(locations, path) - auth_header = None - if uri is None: - # Use the registry API instead, with a signed JWT giving access - repo_name = image.repository.name - namespace_name = image.repository.namespace_user.username - repository_and_namespace = "/".join([namespace_name, repo_name]) - - # Generate the JWT which will authorize this - audience = self._server_hostname - context, subject = build_context_and_subject() - access = [ - {"type": "repository", "name": repository_and_namespace, "actions": ["pull"],} - ] - - auth_token = generate_bearer_token( - audience, subject, context, access, TOKEN_VALIDITY_LIFETIME_S, self._instance_keys - ) - auth_header = "Bearer " + auth_token.decode("ascii") - - uri = self._uri_creator(repository_and_namespace, image.storage.content_checksum) - - return uri, auth_header - - def _new_analyze_request(self, layer): - """ - Create the request body to submit the given layer for analysis. - - If the layer's URL cannot be found, returns None. - """ - layer_id = compute_layer_id(layer) - if layer_id is None: - return None - - url, auth_header = self._get_image_url_and_auth(layer) - if url is None: - return None - - layer_request = { - "Name": layer_id, - "Path": url, - "Format": "Docker", - } - - if auth_header is not None: - layer_request["Headers"] = { - "Authorization": auth_header, - } - - if layer.parent is not None: - if layer.parent.docker_image_id and layer.parent.storage.uuid: - layer_request["ParentName"] = compute_layer_id(layer.parent) - - return { - "Layer": layer_request, - } - - def cleanup_layers(self, layers): - """ - Callback invoked by garbage collection to cleanup any layers that no longer need to be - stored in the security scanner. - """ - for layer in layers: - self.delete_layer(layer) - def ping(self): """ Calls GET on the metrics endpoint of the security scanner to ensure it is running and @@ -355,95 +189,6 @@ class ImplementedSecurityScannerAPI(SecurityScannerAPIInterface): msg = "Exception when trying to connect to security scanner endpoint: %s" % ve raise Exception(msg) - def delete_layer(self, layer): - """ - Calls DELETE on the given layer in the security scanner, removing it from its database. - """ - layer_id = compute_layer_id(layer) - if layer_id is None: - return None - - # NOTE: We are adding an extra check here for the time being just to be sure we're - # not hitting any overlap. - docker_image_id, layer_storage_uuid = layer_id.split(".") - if get_image_with_storage(docker_image_id, layer_storage_uuid): - logger.warning("Found shared Docker ID and storage for layer %s", layer_id) - return False - - try: - self._call("DELETE", _API_METHOD_DELETE_LAYER % layer_id) - return True - except Non200ResponseException: - return False - except requests.exceptions.RequestException: - logger.exception("Failed to delete layer: %s", layer_id) - return False - - def analyze_layer(self, layer): - """ - Posts the given layer to the security scanner for analysis, blocking until complete. - - Returns the analysis version on success or raises an exception deriving from - AnalyzeLayerException on failure. Callers should handle all cases of AnalyzeLayerException. - """ - - def _response_json(request, response): - try: - return response.json() - except ValueError: - logger.exception( - "Failed to decode JSON when analyzing layer %s", request["Layer"]["Name"] - ) - raise AnalyzeLayerException - - request = self._new_analyze_request(layer) - if not request: - logger.error("Could not build analyze request for layer %s", layer.id) - raise AnalyzeLayerException - - logger.debug("Analyzing layer %s", request["Layer"]["Name"]) - try: - response = self._call("POST", _API_METHOD_INSERT, body=request) - except requests.exceptions.Timeout: - logger.exception("Timeout when trying to post layer data response for %s", layer.id) - raise AnalyzeLayerRetryException - except requests.exceptions.ConnectionError: - logger.exception( - "Connection error when trying to post layer data response for %s", layer.id - ) - raise AnalyzeLayerRetryException - except (requests.exceptions.RequestException) as re: - logger.exception("Failed to post layer data response for %s: %s", layer.id, re) - raise AnalyzeLayerException - except Non200ResponseException as ex: - message = _response_json(request, ex.response).get("Error").get("Message", "") - logger.warning( - "A warning event occurred when analyzing layer %s (status code %s): %s", - request["Layer"]["Name"], - ex.response.status_code, - message, - ) - # 400 means the layer could not be analyzed due to a bad request. - if ex.response.status_code == 400: - if message == UNKNOWN_PARENT_LAYER_ERROR_MSG: - raise MissingParentLayerException( - "Bad request to security scanner: %s" % message - ) - else: - logger.exception("Got non-200 response for analyze of layer %s", layer.id) - raise AnalyzeLayerException("Bad request to security scanner: %s" % message) - # 422 means that the layer could not be analyzed: - # - the layer could not be extracted (might be a manifest or an invalid .tar.gz) - # - the layer operating system / package manager is unsupported - elif ex.response.status_code == 422: - raise InvalidLayerException - - # Otherwise, it is some other error and we should retry. - raise AnalyzeLayerRetryException - - # Return the parsed API version. - return _response_json(request, response)["Layer"]["IndexedByVersion"] - def check_layer_vulnerable(self, layer_id, cve_name): """ Checks to see if the layer with the given ID is vulnerable to the specified CVE. @@ -459,51 +204,6 @@ class ImplementedSecurityScannerAPI(SecurityScannerAPIInterface): return False - def get_notification(self, notification_name, layer_limit=100, page=None): - """ - Gets the data for a specific notification, with optional page token. - - Returns a tuple of the data (None on failure) and whether to retry. - """ - try: - params = {"limit": layer_limit} - - if page is not None: - params["page"] = page - - response = self._call( - "GET", _API_METHOD_GET_NOTIFICATION % notification_name, params=params - ) - json_response = response.json() - except requests.exceptions.Timeout: - logger.exception("Timeout when trying to get notification for %s", notification_name) - return None, True - except requests.exceptions.ConnectionError: - logger.exception( - "Connection error when trying to get notification for %s", notification_name - ) - return None, True - except (requests.exceptions.RequestException, ValueError): - logger.exception("Failed to get notification for %s", notification_name) - return None, False - except Non200ResponseException as ex: - return None, ex.response.status_code != 404 and ex.response.status_code != 400 - - return json_response, False - - def mark_notification_read(self, notification_name): - """ - Marks a security scanner notification as read. - """ - try: - self._call("DELETE", _API_METHOD_MARK_NOTIFICATION_READ % notification_name) - return True - except Non200ResponseException: - return False - except requests.exceptions.RequestException: - logger.exception("Failed to mark notification as read: %s", notification_name) - return False - def get_layer_data(self, layer, include_features=False, include_vulnerabilities=False): """ Returns the layer data for the specified layer. diff --git a/util/secscan/fake.py b/util/secscan/fake.py index 1cba85d35..696762fc2 100644 --- a/util/secscan/fake.py +++ b/util/secscan/fake.py @@ -32,7 +32,6 @@ class FakeSecurityScanner(object): self.hostname = hostname self.index_version = index_version self.layers = {} - self.notifications = {} self.layer_vulns = {} self.ok_layer_id = None @@ -84,42 +83,6 @@ class FakeSecurityScanner(object): """ return layer_id in self.layers - def has_notification(self, notification_id): - """ - Returns whether a notification with the given ID is found in the scanner. - """ - return notification_id in self.notifications - - def add_notification( - self, - old_layer_ids, - new_layer_ids, - old_vuln, - new_vuln, - max_per_page=100, - indexed_old_layer_ids=None, - indexed_new_layer_ids=None, - ): - """ - Adds a new notification over the given sets of layer IDs and vulnerability information, - returning the structural data of the notification created. - """ - notification_id = str(uuid.uuid4()) - if old_vuln is None: - old_vuln = dict(new_vuln) - - self.notifications[notification_id] = dict( - old_layer_ids=old_layer_ids, - new_layer_ids=new_layer_ids, - old_vuln=old_vuln, - new_vuln=new_vuln, - max_per_page=max_per_page, - indexed_old_layer_ids=indexed_old_layer_ids, - indexed_new_layer_ids=indexed_new_layer_ids, - ) - - return self._get_notification_data(notification_id, 0, 100) - def layer_id(self, layer): """ Returns the Quay Security Scanner layer ID for the given layer (Image row). @@ -162,62 +125,6 @@ class FakeSecurityScanner(object): } ) - def _get_notification_data(self, notification_id, page, limit): - """ - Returns the structural data for the notification with the given ID, paginated using the - given page and limit. - """ - notification = self.notifications[notification_id] - limit = min(limit, notification["max_per_page"]) - - notification_data = { - "Name": notification_id, - "Created": "1456247389", - "Notified": "1456246708", - "Limit": limit, - } - - start_index = page * limit - end_index = (page + 1) * limit - has_additional_page = False - - if notification.get("old_vuln"): - old_layer_ids = notification["old_layer_ids"] - old_layer_ids = old_layer_ids[start_index:end_index] - has_additional_page = has_additional_page or bool(len(old_layer_ids[end_index - 1 :])) - - notification_data["Old"] = { - "Vulnerability": notification["old_vuln"], - "LayersIntroducingVulnerability": old_layer_ids, - } - - if notification.get("indexed_old_layer_ids", None): - indexed_old_layer_ids = notification["indexed_old_layer_ids"][start_index:end_index] - notification_data["Old"][ - "OrderedLayersIntroducingVulnerability" - ] = indexed_old_layer_ids - - if notification.get("new_vuln"): - new_layer_ids = notification["new_layer_ids"] - new_layer_ids = new_layer_ids[start_index:end_index] - has_additional_page = has_additional_page or bool(len(new_layer_ids[end_index - 1 :])) - - notification_data["New"] = { - "Vulnerability": notification["new_vuln"], - "LayersIntroducingVulnerability": new_layer_ids, - } - - if notification.get("indexed_new_layer_ids", None): - indexed_new_layer_ids = notification["indexed_new_layer_ids"][start_index:end_index] - notification_data["New"][ - "OrderedLayersIntroducingVulnerability" - ] = indexed_new_layer_ids - - if has_additional_page: - notification_data["NextPage"] = str(page + 1) - - return notification_data - def get_endpoints(self): """ Returns the HTTMock endpoint definitions for the fake security scanner. @@ -338,43 +245,6 @@ class FakeSecurityScanner(object): "content": json.dumps({"Layer": self.layers[layer["Name"]],}), } - @urlmatch( - netloc=r"(.*\.)?" + self.hostname, path=r"/v1/notifications/(.+)$", method="DELETE" - ) - def delete_notification(url, _): - notification_id = url.path[len("/v1/notifications/") :] - if notification_id not in self.notifications: - return { - "status_code": 404, - "content": json.dumps({"Error": {"Message": "Unknown notification"}}), - } - - self.notifications.pop(notification_id) - return { - "status_code": 204, - "content": "", - } - - @urlmatch(netloc=r"(.*\.)?" + self.hostname, path=r"/v1/notifications/(.+)$", method="GET") - def get_notification(url, _): - notification_id = url.path[len("/v1/notifications/") :] - if notification_id not in self.notifications: - return { - "status_code": 404, - "content": json.dumps({"Error": {"Message": "Unknown notification"}}), - } - - query_params = urllib.parse.parse_qs(url.query) - limit = int(query_params.get("limit", [2])[0]) - page = int(query_params.get("page", [0])[0]) - - notification_data = self._get_notification_data(notification_id, page, limit) - response = {"Notification": notification_data} - return { - "status_code": 200, - "content": json.dumps(response), - } - @urlmatch(netloc=r"(.*\.)?" + self.hostname, path=r"/v1/metrics$", method="GET") def metrics(url, _): return { @@ -393,8 +263,6 @@ class FakeSecurityScanner(object): get_layer_mock, post_layer_mock, remove_layer_mock, - get_notification, - delete_notification, metrics, response_content, ] diff --git a/util/secscan/notifier.py b/util/secscan/notifier.py deleted file mode 100644 index 9d1db5df1..000000000 --- a/util/secscan/notifier.py +++ /dev/null @@ -1,205 +0,0 @@ -import logging -import sys - -from collections import defaultdict -from enum import Enum - -from data.registry_model import registry_model -from notifications import notification_batch -from util.secscan import PRIORITY_LEVELS -from util.secscan.api import APIRequestFailure -from util.morecollections import AttrDict, StreamingDiffTracker, IndexedStreamingDiffTracker - - -logger = logging.getLogger(__name__) - - -class ProcessNotificationPageResult(Enum): - FINISHED_PAGE = "Finished Page" - FINISHED_PROCESSING = "Finished Processing" - FAILED = "Failed" - - -class SecurityNotificationHandler(object): - """ - Class to process paginated notifications from the security scanner and issue Quay - vulnerability_found notifications for all necessary tags. Callers should initialize, call - process_notification_page_data for each page until it returns FINISHED_PROCESSING or FAILED and, - if succeeded, then call send_notifications to send out the notifications queued. - - NOTE: This is legacy code and should be removed once we're fully moved to Clair V4. - """ - - def __init__(self, legacy_secscan_api, results_per_stream): - self.tags_by_repository_map = defaultdict(set) - self.repository_map = {} - self.check_map = {} - self.layer_ids = set() - self.legacy_secscan_api = legacy_secscan_api - - self.stream_tracker = None - self.results_per_stream = results_per_stream - self.vulnerability_info = None - - def send_notifications(self): - """ - Sends all queued up notifications. - """ - if self.vulnerability_info is None: - return - - new_vuln = self.vulnerability_info - new_severity = PRIORITY_LEVELS.get( - new_vuln.get("Severity", "Unknown"), {"index": sys.maxsize} - ) - - # For each of the tags found, issue a notification. - with notification_batch() as spawn_notification: - for repository_id, tags in self.tags_by_repository_map.items(): - event_data = { - "tags": list(tags), - "vulnerability": { - "id": new_vuln["Name"], - "description": new_vuln.get("Description", None), - "link": new_vuln.get("Link", None), - "priority": new_severity["title"], - "has_fix": "FixedIn" in new_vuln, - }, - } - - spawn_notification( - self.repository_map[repository_id], "vulnerability_found", event_data - ) - - def process_notification_page_data(self, notification_page_data): - """ - Processes the given notification page data to spawn vulnerability notifications as - necessary. - - Returns the status of the processing. - """ - if not "New" in notification_page_data: - return self._done() - - new_data = notification_page_data["New"] - old_data = notification_page_data.get("Old", {}) - - new_vuln = new_data["Vulnerability"] - old_vuln = old_data.get("Vulnerability", {}) - - self.vulnerability_info = new_vuln - - new_layer_ids = new_data.get("LayersIntroducingVulnerability", []) - old_layer_ids = old_data.get("LayersIntroducingVulnerability", []) - - new_severity = PRIORITY_LEVELS.get( - new_vuln.get("Severity", "Unknown"), {"index": sys.maxsize} - ) - old_severity = PRIORITY_LEVELS.get( - old_vuln.get("Severity", "Unknown"), {"index": sys.maxsize} - ) - - # Check if the severity of the vulnerability has increased. If so, then we report this - # vulnerability for *all* layers, rather than a difference, as it is important for everyone. - if new_severity["index"] < old_severity["index"]: - # The vulnerability has had its severity increased. Report for *all* layers. - all_layer_ids = set(new_layer_ids) | set(old_layer_ids) - for layer_id in all_layer_ids: - self._report(layer_id) - - if "NextPage" not in notification_page_data: - return self._done() - else: - return ProcessNotificationPageResult.FINISHED_PAGE - - # Otherwise, only send the notification to new layers. To find only the new layers, we - # need to do a streaming diff vs the old layer IDs stream. - - # Check for ordered data. If found, we use the indexed tracker, which is faster and - # more memory efficient. - is_indexed = False - if ( - "OrderedLayersIntroducingVulnerability" in new_data - or "OrderedLayersIntroducingVulnerability" in old_data - ): - - def tuplize(stream): - return [(entry["LayerName"], entry["Index"]) for entry in stream] - - new_layer_ids = tuplize(new_data.get("OrderedLayersIntroducingVulnerability", [])) - old_layer_ids = tuplize(old_data.get("OrderedLayersIntroducingVulnerability", [])) - is_indexed = True - - # If this is the first call, initialize the tracker. - if self.stream_tracker is None: - self.stream_tracker = ( - IndexedStreamingDiffTracker(self._report, self.results_per_stream) - if is_indexed - else StreamingDiffTracker(self._report, self.results_per_stream) - ) - - # Call to add the old and new layer ID streams to the tracker. The tracker itself will - # call _report whenever it has determined a new layer has been found. - self.stream_tracker.push_new(new_layer_ids) - self.stream_tracker.push_old(old_layer_ids) - - # Check to see if there are any additional pages to process. - if "NextPage" not in notification_page_data: - return self._done() - else: - return ProcessNotificationPageResult.FINISHED_PAGE - - def _done(self): - if self.stream_tracker is not None: - # Mark the tracker as done, so that it finishes reporting any outstanding layers. - self.stream_tracker.done() - - # Process all the layers. - if self.vulnerability_info is not None: - if not self._process_layers(): - return ProcessNotificationPageResult.FAILED - - return ProcessNotificationPageResult.FINISHED_PROCESSING - - def _report(self, new_layer_id): - self.layer_ids.add(new_layer_id) - - def _chunk(self, pairs, chunk_size): - start_index = 0 - while start_index < len(pairs): - yield pairs[start_index:chunk_size] - start_index += chunk_size - - def _process_layers(self): - cve_id = self.vulnerability_info["Name"] - - # Builds the pairs of layer ID and storage uuid. - pairs = [tuple(layer_id.split(".", 2)) for layer_id in self.layer_ids] - - # Find the matching tags. - for current_pairs in self._chunk(pairs, 50): - tags = list(registry_model.yield_tags_for_vulnerability_notification(current_pairs)) - for tag in tags: - # Verify that the tag's *top layer* has the vulnerability. - if not tag.layer_id in self.check_map: - logger.debug("Checking if layer %s is vulnerable to %s", tag.layer_id, cve_id) - try: - self.check_map[ - tag.layer_id - ] = self.legacy_secscan_api.check_layer_vulnerable(tag.layer_id, cve_id) - except APIRequestFailure: - return False - - logger.debug( - "Result of layer %s is vulnerable to %s check: %s", - tag.layer_id, - cve_id, - self.check_map[tag.layer_id], - ) - - if self.check_map[tag.layer_id]: - # Add the vulnerable tag to the list. - self.tags_by_repository_map[tag.repository.id].add(tag.name) - self.repository_map[tag.repository.id] = tag.repository - - return True diff --git a/util/secscan/v4/test/test_secscan.py b/util/secscan/v4/test/test_secscan.py index fc3ecf5cc..d91116fc1 100644 --- a/util/secscan/v4/test/test_secscan.py +++ b/util/secscan/v4/test/test_secscan.py @@ -18,9 +18,8 @@ from app import app def manifest_for(namespace, repository, tagname): repository_ref = registry_model.lookup_repository(namespace, repository) - tag = registry_model.get_repo_tag(repository_ref, tagname, include_legacy_image=True) - - return registry_model.get_manifest_for_tag(tag, backfill_if_necessary=True) + tag = registry_model.get_repo_tag(repository_ref, tagname) + return registry_model.get_manifest_for_tag(tag) @pytest.fixture() diff --git a/util/security/signing.py b/util/security/signing.py deleted file mode 100644 index 2eb7c6880..000000000 --- a/util/security/signing.py +++ /dev/null @@ -1,87 +0,0 @@ -import gpg -import features -import logging -from io import BytesIO - - -logger = logging.getLogger(__name__) - - -class GPG2Signer(object): - """ - Helper class for signing data using GPG2. - """ - - def __init__(self, config, config_provider): - if not config.get("GPG2_PRIVATE_KEY_NAME"): - raise Exception("Missing configuration key GPG2_PRIVATE_KEY_NAME") - - if not config.get("GPG2_PRIVATE_KEY_FILENAME"): - raise Exception("Missing configuration key GPG2_PRIVATE_KEY_FILENAME") - - if not config.get("GPG2_PUBLIC_KEY_FILENAME"): - raise Exception("Missing configuration key GPG2_PUBLIC_KEY_FILENAME") - - self._ctx = gpg.Context() - self._ctx.armor = True - self._private_key_name = config["GPG2_PRIVATE_KEY_NAME"] - self._public_key_filename = config["GPG2_PUBLIC_KEY_FILENAME"] - self._config_provider = config_provider - - if not config_provider.volume_file_exists(config["GPG2_PRIVATE_KEY_FILENAME"]): - raise Exception("Missing key file %s" % config["GPG2_PRIVATE_KEY_FILENAME"]) - - with config_provider.get_volume_file(config["GPG2_PRIVATE_KEY_FILENAME"], mode="rb") as fp: - self._ctx.op_import(fp) - - @property - def name(self): - return "gpg2" - - def open_public_key_file(self): - return self._config_provider.get_volume_file(self._public_key_filename, mode="rb") - - def detached_sign(self, stream): - """ - Signs the given byte-like stream, returning the signature. - """ - ctx = self._ctx - try: - ctx.signers = [ctx.get_key(self._private_key_name, 0)] - except: - raise Exception("Invalid private key name") - - data = stream.read() - if not isinstance(data, bytes): - raise TypeError("Stream is not byte-like") - - sign_res = ctx.sign(data, mode=gpg.constants.sig.mode.DETACH) - return sign_res[0] - - -class Signer(object): - def __init__(self, app=None, config_provider=None): - self.app = app - if app is not None: - self.state = self.init_app(app, config_provider) - else: - self.state = None - - def init_app(self, app, config_provider): - preference = app.config.get("SIGNING_ENGINE", None) - if preference is None: - return None - - if not features.ACI_CONVERSION: - return None - - try: - return SIGNING_ENGINES[preference](app.config, config_provider) - except: - logger.exception("Could not initialize signing engine") - - def __getattr__(self, name): - return getattr(self.state, name, None) - - -SIGNING_ENGINES = {"gpg2": GPG2Signer} diff --git a/util/security/test/test_signing.py b/util/security/test/test_signing.py deleted file mode 100644 index 2965dae02..000000000 --- a/util/security/test/test_signing.py +++ /dev/null @@ -1,28 +0,0 @@ -import pytest -from io import StringIO, BytesIO - -from app import app, config_provider -from util.security.signing import Signer - - -@pytest.fixture(params=["gpg2"]) -def signer(request): - app.config["SIGNING_ENGINE"] = request.param - return Signer(app, config_provider) - - -@pytest.mark.parametrize( - "data, expected_exception", - [ - ("Unicode strings not allowed", AttributeError), - (StringIO("Not OK, because this does not implement buffer protocol"), TypeError), - (b"bytes are not ok. It should be wrapped in a file-like object", AttributeError), - (BytesIO(b"Thisisfine"), None), - ], -) -def test_detached_sign(data, expected_exception, signer): - if expected_exception is not None: - with pytest.raises(expected_exception): - signer.detached_sign(data) - else: - signer.detached_sign(data) diff --git a/util/test/test_workers.py b/util/test/test_workers.py index 9a8796043..78fb8a493 100644 --- a/util/test/test_workers.py +++ b/util/test/test_workers.py @@ -28,10 +28,6 @@ from util.workers import get_worker_count ("registry", {"WORKER_COUNT": 1,}, [0, 1], 10, 8, 64, 8), # Override always uses specific first. ("registry", {"WORKER_COUNT_REGISTRY": 120, "WORKER_COUNT": 12,}, [0, 1], 10, 8, 64, 120), - # Non-matching override. - ("verbs", {"WORKER_COUNT_REGISTRY": 120,}, [0, 1], 10, 8, 64, 20), - # Zero worker count (use defaults). - ("verbs", {"WORKER_COUNT": 0,}, [0, 1], 10, 8, 64, 8), ], ) def test_get_worker_count( diff --git a/util/vendor/__init__.py b/util/vendor/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/util/vendor/paxtarfile.py b/util/vendor/paxtarfile.py deleted file mode 100644 index 5e13c9303..000000000 --- a/util/vendor/paxtarfile.py +++ /dev/null @@ -1,2885 +0,0 @@ -# -*- coding: iso-8859-1 -*- - -# This version of tarfile was taken from python 2.7.10, and amended -# to fix a problem trying to decode non-text header fields present -# in some tar files using pax headers and/or extended attributes. - -# ------------------------------------------------------------------- -# tarfile.py -# ------------------------------------------------------------------- -# Copyright (C) 2002 Lars Gustäbel -# All rights reserved. -# -# Permission is hereby granted, free of charge, to any person -# obtaining a copy of this software and associated documentation -# files (the "Software"), to deal in the Software without -# restriction, including without limitation the rights to use, -# copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following -# conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. -# -""" -Read from and write to tar format archives. -""" - -__version__ = "$Revision: 85213 $" -# $Source$ - -version = "0.9.0" -__author__ = "Lars Gustäbel (lars@gustaebel.de)" -__date__ = "$Date$" -__cvsid__ = "$Id$" -__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend." - -# --------- -# Imports -# --------- -from builtins import open as bltn_open -import sys -import os -import shutil -import stat -import errno -import time -import struct -import copy -import re -import operator - -try: - import grp, pwd -except ImportError: - grp = pwd = None - -# from tarfile import * -__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"] - -# --------------------------------------------------------- -# tar constants -# --------------------------------------------------------- -NUL = "\0" # the null character -BLOCKSIZE = 512 # length of processing blocks -RECORDSIZE = BLOCKSIZE * 20 # length of records -GNU_MAGIC = "ustar \0" # magic gnu tar string -POSIX_MAGIC = "ustar\x0000" # magic posix tar string - -LENGTH_NAME = 100 # maximum length of a filename -LENGTH_LINK = 100 # maximum length of a linkname -LENGTH_PREFIX = 155 # maximum length of the prefix field - -REGTYPE = "0" # regular file -AREGTYPE = "\0" # regular file -LNKTYPE = "1" # link (inside tarfile) -SYMTYPE = "2" # symbolic link -CHRTYPE = "3" # character special device -BLKTYPE = "4" # block special device -DIRTYPE = "5" # directory -FIFOTYPE = "6" # fifo special device -CONTTYPE = "7" # contiguous file - -GNUTYPE_LONGNAME = "L" # GNU tar longname -GNUTYPE_LONGLINK = "K" # GNU tar longlink -GNUTYPE_SPARSE = "S" # GNU tar sparse file - -XHDTYPE = "x" # POSIX.1-2001 extended header -XGLTYPE = "g" # POSIX.1-2001 global header -SOLARIS_XHDTYPE = "X" # Solaris extended header - -USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format -GNU_FORMAT = 1 # GNU tar format -PAX_FORMAT = 2 # POSIX.1-2001 (pax) format -DEFAULT_FORMAT = GNU_FORMAT - -# --------------------------------------------------------- -# tarfile constants -# --------------------------------------------------------- -# File types that tarfile supports: -SUPPORTED_TYPES = ( - REGTYPE, - AREGTYPE, - LNKTYPE, - SYMTYPE, - DIRTYPE, - FIFOTYPE, - CONTTYPE, - CHRTYPE, - BLKTYPE, - GNUTYPE_LONGNAME, - GNUTYPE_LONGLINK, - GNUTYPE_SPARSE, -) - -# File types that will be treated as a regular file. -REGULAR_TYPES = (REGTYPE, AREGTYPE, CONTTYPE, GNUTYPE_SPARSE) - -# File types that are part of the GNU tar format. -GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK, GNUTYPE_SPARSE) - -# Fields from a pax header that override a TarInfo attribute. -PAX_FIELDS = ("path", "linkpath", "size", "mtime", "uid", "gid", "uname", "gname", "SCHILY.xattr.") - -# Fields in a pax header that are numbers, all other fields -# are treated as strings. -PAX_NUMBER_FIELDS = { - "atime": float, - "ctime": float, - "mtime": float, - "uid": int, - "gid": int, - "size": int, - "SCHILY.xattr.": dict, -} - -# --------------------------------------------------------- -# Bits used in the mode field, values in octal. -# --------------------------------------------------------- -S_IFLNK = 0o120000 # symbolic link -S_IFREG = 0o100000 # regular file -S_IFBLK = 0o060000 # block device -S_IFDIR = 0o040000 # directory -S_IFCHR = 0o020000 # character device -S_IFIFO = 0o010000 # fifo - -TSUID = 0o4000 # set UID on execution -TSGID = 0o2000 # set GID on execution -TSVTX = 0o1000 # reserved - -TUREAD = 0o400 # read by owner -TUWRITE = 0o200 # write by owner -TUEXEC = 0o100 # execute/search by owner -TGREAD = 0o040 # read by group -TGWRITE = 0o020 # write by group -TGEXEC = 0o010 # execute/search by group -TOREAD = 0o004 # read by other -TOWRITE = 0o002 # write by other -TOEXEC = 0o001 # execute/search by other - -# --------------------------------------------------------- -# initialization -# --------------------------------------------------------- -ENCODING = sys.getfilesystemencoding() -if ENCODING is None: - ENCODING = sys.getdefaultencoding() - -# --------------------------------------------------------- -# Some useful functions -# --------------------------------------------------------- - - -def stn(s, length): - """ - Convert a python string to a null-terminated string buffer. - """ - return s[:length] + (length - len(s)) * NUL - - -def nts(s): - """ - Convert a null-terminated string field to a python string. - """ - # Use the string up to the first null char. - p = s.find("\0") - if p == -1: - return s - return s[:p] - - -def nti(s): - """ - Convert a number field to a python number. - """ - # There are two possible encodings for a number field, see - # itn() below. - if s[0] != chr(0o200): - try: - n = int(nts(s).strip() or "0", 8) - except ValueError: - raise InvalidHeaderError("invalid header") - else: - n = 0 - for i in range(len(s) - 1): - n <<= 8 - n += ord(s[i + 1]) - return n - - -def itn(n, digits=8, format=DEFAULT_FORMAT): - """ - Convert a python number to a number field. - """ - # POSIX 1003.1-1988 requires numbers to be encoded as a string of - # octal digits followed by a null-byte, this allows values up to - # (8**(digits-1))-1. GNU tar allows storing numbers greater than - # that if necessary. A leading 0200 byte indicates this particular - # encoding, the following digits-1 bytes are a big-endian - # representation. This allows values up to (256**(digits-1))-1. - if 0 <= n < 8 ** (digits - 1): - s = "%0*o" % (digits - 1, n) + NUL - else: - if format != GNU_FORMAT or n >= 256 ** (digits - 1): - raise ValueError("overflow in number field") - - if n < 0: - # XXX We mimic GNU tar's behaviour with negative numbers, - # this could raise OverflowError. - n = struct.unpack("L", struct.pack("l", n))[0] - - s = "" - for i in range(digits - 1): - s = chr(n & 0o377) + s - n >>= 8 - s = chr(0o200) + s - return s - - -def uts(s, encoding, errors): - """ - Convert a unicode object to a string. - """ - if errors == "utf-8": - # An extra error handler similar to the -o invalid=UTF-8 option - # in POSIX.1-2001. Replace untranslatable characters with their - # UTF-8 representation. - try: - return s.encode(encoding, "strict") - except UnicodeEncodeError: - x = [] - for c in s: - try: - x.append(c.encode(encoding, "strict")) - except UnicodeEncodeError: - x.append(c.encode("utf8")) - return "".join(x) - else: - return s.encode(encoding, errors) - - -def calc_chksums(buf): - """ - Calculate the checksum for a member's header by summing up all characters except for the chksum - field which is treated as if it was filled with spaces. - - According to the GNU tar sources, some tars (Sun and NeXT) calculate chksum with signed char, - which will be different if there are chars in the buffer with the high bit set. So we calculate - two checksums, unsigned and signed. - """ - unsigned_chksum = 256 + sum( - struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]) - ) - signed_chksum = 256 + sum( - struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]) - ) - return unsigned_chksum, signed_chksum - - -def copyfileobj(src, dst, length=None): - """ - Copy length bytes from fileobj src to fileobj dst. - - If length is None, copy the entire content. - """ - if length == 0: - return - if length is None: - shutil.copyfileobj(src, dst) - return - - BUFSIZE = 16 * 1024 - blocks, remainder = divmod(length, BUFSIZE) - for b in range(blocks): - buf = src.read(BUFSIZE) - if len(buf) < BUFSIZE: - raise IOError("end of file reached") - dst.write(buf) - - if remainder != 0: - buf = src.read(remainder) - if len(buf) < remainder: - raise IOError("end of file reached") - dst.write(buf) - return - - -filemode_table = ( - ( - (S_IFLNK, "l"), - (S_IFREG, "-"), - (S_IFBLK, "b"), - (S_IFDIR, "d"), - (S_IFCHR, "c"), - (S_IFIFO, "p"), - ), - ((TUREAD, "r"),), - ((TUWRITE, "w"),), - ((TUEXEC | TSUID, "s"), (TSUID, "S"), (TUEXEC, "x")), - ((TGREAD, "r"),), - ((TGWRITE, "w"),), - ((TGEXEC | TSGID, "s"), (TSGID, "S"), (TGEXEC, "x")), - ((TOREAD, "r"),), - ((TOWRITE, "w"),), - ((TOEXEC | TSVTX, "t"), (TSVTX, "T"), (TOEXEC, "x")), -) - - -def filemode(mode): - """ - Convert a file's mode to a string of the form. - - -rwxrwxrwx. - Used by TarFile.list() - """ - perm = [] - for table in filemode_table: - for bit, char in table: - if mode & bit == bit: - perm.append(char) - break - else: - perm.append("-") - return "".join(perm) - - -class TarError(Exception): - """ - Base exception. - """ - - pass - - -class ExtractError(TarError): - """ - General exception for extract errors. - """ - - pass - - -class ReadError(TarError): - """ - Exception for unreadable tar archives. - """ - - pass - - -class CompressionError(TarError): - """ - Exception for unavailable compression methods. - """ - - pass - - -class StreamError(TarError): - """ - Exception for unsupported operations on stream-like TarFiles. - """ - - pass - - -class HeaderError(TarError): - """ - Base exception for header errors. - """ - - pass - - -class EmptyHeaderError(HeaderError): - """ - Exception for empty headers. - """ - - pass - - -class TruncatedHeaderError(HeaderError): - """ - Exception for truncated headers. - """ - - pass - - -class EOFHeaderError(HeaderError): - """ - Exception for end of file headers. - """ - - pass - - -class InvalidHeaderError(HeaderError): - """ - Exception for invalid headers. - """ - - pass - - -class SubsequentHeaderError(HeaderError): - """ - Exception for missing and invalid extended headers. - """ - - pass - - -# --------------------------- -# internal stream interface -# --------------------------- -class _LowLevelFile: - """ - Low-level file object. - - Supports reading and writing. It is used instead of a regular file object for streaming access. - """ - - def __init__(self, name, mode): - mode = {"r": os.O_RDONLY, "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,}[mode] - if hasattr(os, "O_BINARY"): - mode |= os.O_BINARY - self.fd = os.open(name, mode, 0o666) - - def close(self): - os.close(self.fd) - - def read(self, size): - return os.read(self.fd, size) - - def write(self, s): - os.write(self.fd, s) - - -class _Stream: - """ - Class that serves as an adapter between TarFile and a stream-like object. The stream-like - object only needs to have a read() or write() method and is accessed blockwise. Use of gzip or - bzip2 compression is possible. A stream-like object could be for example: sys.stdin, sys.stdout, - a socket, a tape device etc. - - _Stream is intended to be used only internally. - """ - - def __init__(self, name, mode, comptype, fileobj, bufsize): - """ - Construct a _Stream object. - """ - self._extfileobj = True - if fileobj is None: - fileobj = _LowLevelFile(name, mode) - self._extfileobj = False - - if comptype == "*": - # Enable transparent compression detection for the - # stream interface - fileobj = _StreamProxy(fileobj) - comptype = fileobj.getcomptype() - - self.name = name or "" - self.mode = mode - self.comptype = comptype - self.fileobj = fileobj - self.bufsize = bufsize - self.buf = "" - self.pos = 0 - self.closed = False - - try: - if comptype == "gz": - try: - import zlib - except ImportError: - raise CompressionError("zlib module is not available") - self.zlib = zlib - self.crc = zlib.crc32("") & 0xFFFFFFFF - if mode == "r": - self._init_read_gz() - else: - self._init_write_gz() - - elif comptype == "bz2": - try: - import bz2 - except ImportError: - raise CompressionError("bz2 module is not available") - if mode == "r": - self.dbuf = "" - self.cmp = bz2.BZ2Decompressor() - else: - self.cmp = bz2.BZ2Compressor() - except: - if not self._extfileobj: - self.fileobj.close() - self.closed = True - raise - - def __del__(self): - if hasattr(self, "closed") and not self.closed: - self.close() - - def _init_write_gz(self): - """ - Initialize for writing with gzip compression. - """ - self.cmp = self.zlib.compressobj( - 9, self.zlib.DEFLATED, -self.zlib.MAX_WBITS, self.zlib.DEF_MEM_LEVEL, 0 - ) - timestamp = struct.pack(" self.bufsize: - self.fileobj.write(self.buf[: self.bufsize]) - self.buf = self.buf[self.bufsize :] - - def close(self): - """ - Close the _Stream object. - - No operation should be done on it afterwards. - """ - if self.closed: - return - - self.closed = True - try: - if self.mode == "w" and self.comptype != "tar": - self.buf += self.cmp.flush() - - if self.mode == "w" and self.buf: - self.fileobj.write(self.buf) - self.buf = "" - if self.comptype == "gz": - # The native zlib crc is an unsigned 32-bit integer, but - # the Python wrapper implicitly casts that to a signed C - # long. So, on a 32-bit box self.crc may "look negative", - # while the same crc on a 64-bit box may "look positive". - # To avoid irksome warnings from the `struct` module, force - # it to look positive on all boxes. - self.fileobj.write(struct.pack("= 0: - blocks, remainder = divmod(pos - self.pos, self.bufsize) - for i in range(blocks): - self.read(self.bufsize) - self.read(remainder) - else: - raise StreamError("seeking backwards is not allowed") - return self.pos - - def read(self, size=None): - """ - Return the next size number of bytes from the stream. - - If size is not defined, return all bytes of the stream up to EOF. - """ - if size is None: - t = [] - while True: - buf = self._read(self.bufsize) - if not buf: - break - t.append(buf) - buf = "".join(t) - else: - buf = self._read(size) - self.pos += len(buf) - return buf - - def _read(self, size): - """ - Return size bytes from the stream. - """ - if self.comptype == "tar": - return self.__read(size) - - c = len(self.dbuf) - t = [self.dbuf] - while c < size: - buf = self.__read(self.bufsize) - if not buf: - break - try: - buf = self.cmp.decompress(buf) - except IOError: - raise ReadError("invalid compressed data") - t.append(buf) - c += len(buf) - t = "".join(t) - self.dbuf = t[size:] - return t[:size] - - def __read(self, size): - """ - Return size bytes from stream. - - If internal buffer is empty, read another block from the stream. - """ - c = len(self.buf) - t = [self.buf] - while c < size: - buf = self.fileobj.read(self.bufsize) - if not buf: - break - t.append(buf) - c += len(buf) - t = "".join(t) - self.buf = t[size:] - return t[:size] - - -# class _Stream - - -class _StreamProxy(object): - """ - Small proxy class that enables transparent compression detection for the Stream interface (mode - 'r|*'). - """ - - def __init__(self, fileobj): - self.fileobj = fileobj - self.buf = self.fileobj.read(BLOCKSIZE) - - def read(self, size): - self.read = self.fileobj.read - return self.buf - - def getcomptype(self): - if self.buf.startswith("\037\213\010"): - return "gz" - if self.buf[0:3] == "BZh" and self.buf[4:10] == "1AY&SY": - return "bz2" - return "tar" - - def close(self): - self.fileobj.close() - - -# class StreamProxy - - -class _BZ2Proxy(object): - """ - Small proxy class that enables external file object support for "r:bz2" and "w:bz2" modes. - - This is actually a workaround for a limitation in bz2 module's BZ2File class which (unlike - gzip.GzipFile) has no support for a file object argument. - """ - - blocksize = 16 * 1024 - - def __init__(self, fileobj, mode): - self.fileobj = fileobj - self.mode = mode - self.name = getattr(self.fileobj, "name", None) - self.init() - - def init(self): - import bz2 - - self.pos = 0 - if self.mode == "r": - self.bz2obj = bz2.BZ2Decompressor() - self.fileobj.seek(0) - self.buf = "" - else: - self.bz2obj = bz2.BZ2Compressor() - - def read(self, size): - b = [self.buf] - x = len(self.buf) - while x < size: - raw = self.fileobj.read(self.blocksize) - if not raw: - break - data = self.bz2obj.decompress(raw) - b.append(data) - x += len(data) - self.buf = "".join(b) - - buf = self.buf[:size] - self.buf = self.buf[size:] - self.pos += len(buf) - return buf - - def seek(self, pos): - if pos < self.pos: - self.init() - self.read(pos - self.pos) - - def tell(self): - return self.pos - - def write(self, data): - self.pos += len(data) - raw = self.bz2obj.compress(data) - self.fileobj.write(raw) - - def close(self): - if self.mode == "w": - raw = self.bz2obj.flush() - self.fileobj.write(raw) - - -# class _BZ2Proxy - -# ------------------------ -# Extraction file object -# ------------------------ -class _FileInFile(object): - """ - A thin wrapper around an existing file object that provides a part of its data as an individual - file object. - """ - - def __init__(self, fileobj, offset, size, sparse=None): - self.fileobj = fileobj - self.offset = offset - self.size = size - self.sparse = sparse - self.position = 0 - - def tell(self): - """ - Return the current file position. - """ - return self.position - - def seek(self, position): - """ - Seek to a position in the file. - """ - self.position = position - - def read(self, size=None): - """ - Read data from the file. - """ - if size is None: - size = self.size - self.position - else: - size = min(size, self.size - self.position) - - if self.sparse is None: - return self.readnormal(size) - else: - return self.readsparse(size) - - def __read(self, size): - buf = self.fileobj.read(size) - if len(buf) != size: - raise ReadError("unexpected end of data") - return buf - - def readnormal(self, size): - """ - Read operation for regular files. - """ - self.fileobj.seek(self.offset + self.position) - self.position += size - return self.__read(size) - - def readsparse(self, size): - """ - Read operation for sparse files. - """ - data = [] - while size > 0: - buf = self.readsparsesection(size) - if not buf: - break - size -= len(buf) - data.append(buf) - return "".join(data) - - def readsparsesection(self, size): - """ - Read a single section of a sparse file. - """ - section = self.sparse.find(self.position) - - if section is None: - return "" - - size = min(size, section.offset + section.size - self.position) - - if isinstance(section, _data): - realpos = section.realpos + self.position - section.offset - self.fileobj.seek(self.offset + realpos) - self.position += size - return self.__read(size) - else: - self.position += size - return NUL * size - - -# class _FileInFile - - -class ExFileObject(object): - """ - File-like object for reading an archive member. - - Is returned by TarFile.extractfile(). - """ - - blocksize = 1024 - - def __init__(self, tarfile, tarinfo): - self.fileobj = _FileInFile( - tarfile.fileobj, tarinfo.offset_data, tarinfo.size, getattr(tarinfo, "sparse", None) - ) - self.name = tarinfo.name - self.mode = "r" - self.closed = False - self.size = tarinfo.size - - self.position = 0 - self.buffer = "" - - def read(self, size=None): - """ - Read at most size bytes from the file. - - If size is not present or None, read all data until EOF is reached. - """ - if self.closed: - raise ValueError("I/O operation on closed file") - - buf = "" - if self.buffer: - if size is None: - buf = self.buffer - self.buffer = "" - else: - buf = self.buffer[:size] - self.buffer = self.buffer[size:] - - if size is None: - buf += self.fileobj.read() - else: - buf += self.fileobj.read(size - len(buf)) - - self.position += len(buf) - return buf - - def readline(self, size=-1): - """ - Read one entire line from the file. - - If size is present and non-negative, return a string with at most that size, which may be an - incomplete line. - """ - if self.closed: - raise ValueError("I/O operation on closed file") - - if "\n" in self.buffer: - pos = self.buffer.find("\n") + 1 - else: - buffers = [self.buffer] - while True: - buf = self.fileobj.read(self.blocksize) - buffers.append(buf) - if not buf or "\n" in buf: - self.buffer = "".join(buffers) - pos = self.buffer.find("\n") + 1 - if pos == 0: - # no newline found. - pos = len(self.buffer) - break - - if size != -1: - pos = min(size, pos) - - buf = self.buffer[:pos] - self.buffer = self.buffer[pos:] - self.position += len(buf) - return buf - - def readlines(self): - """ - Return a list with all remaining lines. - """ - result = [] - while True: - line = self.readline() - if not line: - break - result.append(line) - return result - - def tell(self): - """ - Return the current file position. - """ - if self.closed: - raise ValueError("I/O operation on closed file") - - return self.position - - def seek(self, pos, whence=os.SEEK_SET): - """ - Seek to a position in the file. - """ - if self.closed: - raise ValueError("I/O operation on closed file") - - if whence == os.SEEK_SET: - self.position = min(max(pos, 0), self.size) - elif whence == os.SEEK_CUR: - if pos < 0: - self.position = max(self.position + pos, 0) - else: - self.position = min(self.position + pos, self.size) - elif whence == os.SEEK_END: - self.position = max(min(self.size + pos, self.size), 0) - else: - raise ValueError("Invalid argument") - - self.buffer = "" - self.fileobj.seek(self.position) - - def close(self): - """ - Close the file object. - """ - self.closed = True - - def __iter__(self): - """ - Get an iterator over the file's lines. - """ - while True: - line = self.readline() - if not line: - break - yield line - - -# class ExFileObject - -# ------------------ -# Exported Classes -# ------------------ -class TarInfo(object): - """ - Informational class which holds the details about an archive member given by a tar header block. - - TarInfo objects are returned by TarFile.getmember(), TarFile.getmembers() and - TarFile.gettarinfo() and are usually created internally. - """ - - def __init__(self, name=""): - """ - Construct a TarInfo object. - - name is the optional name of the member. - """ - self.name = name # member name - self.mode = 0o644 # file permissions - self.uid = 0 # user id - self.gid = 0 # group id - self.size = 0 # file size - self.mtime = 0 # modification time - self.chksum = 0 # header checksum - self.type = REGTYPE # member type - self.linkname = "" # link name - self.uname = "" # user name - self.gname = "" # group name - self.devmajor = 0 # device major number - self.devminor = 0 # device minor number - - self.offset = 0 # the tar header starts here - self.offset_data = 0 # the file's data starts here - - self.pax_headers = {} # pax header information - - # In pax headers the "name" and "linkname" field are called - # "path" and "linkpath". - def _getpath(self): - return self.name - - def _setpath(self, name): - self.name = name - - path = property(_getpath, _setpath) - - def _getlinkpath(self): - return self.linkname - - def _setlinkpath(self, linkname): - self.linkname = linkname - - linkpath = property(_getlinkpath, _setlinkpath) - - def __repr__(self): - return "<%s %r at %#x>" % (self.__class__.__name__, self.name, id(self)) - - def get_info(self, encoding, errors): - """ - Return the TarInfo's attributes as a dictionary. - """ - info = { - "name": self.name, - "mode": self.mode & 0o7777, - "uid": self.uid, - "gid": self.gid, - "size": self.size, - "mtime": self.mtime, - "chksum": self.chksum, - "type": self.type, - "linkname": self.linkname, - "uname": self.uname, - "gname": self.gname, - "devmajor": self.devmajor, - "devminor": self.devminor, - } - - if info["type"] == DIRTYPE and not info["name"].endswith("/"): - info["name"] += "/" - - for key in ("name", "linkname", "uname", "gname"): - if type(info[key]) is str: - info[key] = info[key].encode(encoding, errors) - - return info - - def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"): - """ - Return a tar header as a string of 512 byte blocks. - """ - info = self.get_info(encoding, errors) - - if format == USTAR_FORMAT: - return self.create_ustar_header(info) - elif format == GNU_FORMAT: - return self.create_gnu_header(info) - elif format == PAX_FORMAT: - return self.create_pax_header(info, encoding, errors) - else: - raise ValueError("invalid format") - - def create_ustar_header(self, info): - """ - Return the object as a ustar header block. - """ - info["magic"] = POSIX_MAGIC - - if len(info["linkname"]) > LENGTH_LINK: - raise ValueError("linkname is too long") - - if len(info["name"]) > LENGTH_NAME: - info["prefix"], info["name"] = self._posix_split_name(info["name"]) - - return self._create_header(info, USTAR_FORMAT) - - def create_gnu_header(self, info): - """ - Return the object as a GNU header block sequence. - """ - info["magic"] = GNU_MAGIC - - buf = "" - if len(info["linkname"]) > LENGTH_LINK: - buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK) - - if len(info["name"]) > LENGTH_NAME: - buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME) - - return buf + self._create_header(info, GNU_FORMAT) - - def create_pax_header(self, info, encoding, errors): - """ - Return the object as a ustar header block. - - If it cannot be represented this way, prepend a pax extended header sequence with supplement - information. - """ - info["magic"] = POSIX_MAGIC - pax_headers = self.pax_headers.copy() - - # Test string fields for values that exceed the field length or cannot - # be represented in ASCII encoding. - for name, hname, length in ( - ("name", "path", LENGTH_NAME), - ("linkname", "linkpath", LENGTH_LINK), - ("uname", "uname", 32), - ("gname", "gname", 32), - ): - - if hname in pax_headers: - # The pax header has priority. - continue - - val = info[name].decode(encoding, errors) - - # Try to encode the string as ASCII. - try: - val.encode("ascii") - except UnicodeEncodeError: - pax_headers[hname] = val - continue - - if len(info[name]) > length: - pax_headers[hname] = val - - # Test number fields for values that exceed the field limit or values - # that like to be stored as float. - for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)): - if name in pax_headers: - # The pax header has priority. Avoid overflow. - info[name] = 0 - continue - - val = info[name] - if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float): - pax_headers[name] = str(val) - info[name] = 0 - - # Create a pax extended header if necessary. - if pax_headers: - buf = self._create_pax_generic_header(pax_headers) - else: - buf = "" - - return buf + self._create_header(info, USTAR_FORMAT) - - @classmethod - def create_pax_global_header(cls, pax_headers): - """ - Return the object as a pax global header block sequence. - """ - return cls._create_pax_generic_header(pax_headers, type=XGLTYPE) - - def _posix_split_name(self, name): - """ - Split a name longer than 100 chars into a prefix and a name part. - """ - prefix = name[: LENGTH_PREFIX + 1] - while prefix and prefix[-1] != "/": - prefix = prefix[:-1] - - name = name[len(prefix) :] - prefix = prefix[:-1] - - if not prefix or len(name) > LENGTH_NAME: - raise ValueError("name is too long") - return prefix, name - - @staticmethod - def _create_header(info, format): - """ - Return a header block. - - info is a dictionary with file information, format must be one of the *_FORMAT constants. - """ - parts = [ - stn(info.get("name", ""), 100), - itn(info.get("mode", 0) & 0o7777, 8, format), - itn(info.get("uid", 0), 8, format), - itn(info.get("gid", 0), 8, format), - itn(info.get("size", 0), 12, format), - itn(info.get("mtime", 0), 12, format), - " ", # checksum field - info.get("type", REGTYPE), - stn(info.get("linkname", ""), 100), - stn(info.get("magic", POSIX_MAGIC), 8), - stn(info.get("uname", ""), 32), - stn(info.get("gname", ""), 32), - itn(info.get("devmajor", 0), 8, format), - itn(info.get("devminor", 0), 8, format), - stn(info.get("prefix", ""), 155), - ] - - buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts)) - chksum = calc_chksums(buf[-BLOCKSIZE:])[0] - buf = buf[:-364] + "%06o\0" % chksum + buf[-357:] - return buf - - @staticmethod - def _create_payload(payload): - """ - Return the string payload filled with zero bytes up to the next 512 byte border. - """ - blocks, remainder = divmod(len(payload), BLOCKSIZE) - if remainder > 0: - payload += (BLOCKSIZE - remainder) * NUL - return payload - - @classmethod - def _create_gnu_long_header(cls, name, type): - """ - Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence for name. - """ - name += NUL - - info = {} - info["name"] = "././@LongLink" - info["type"] = type - info["size"] = len(name) - info["magic"] = GNU_MAGIC - - # create extended header + name blocks. - return cls._create_header(info, USTAR_FORMAT) + cls._create_payload(name) - - @classmethod - def _create_pax_generic_header(cls, pax_headers, type=XHDTYPE): - """ - Return a POSIX.1-2001 extended or global header sequence that contains a list of keyword, - value pairs. - - The values must be unicode objects. - """ - records = [] - for keyword, value in pax_headers.items(): - keyword = keyword.encode("utf8") - value = value.encode("utf8") - l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n' - n = p = 0 - while True: - n = l + len(str(p)) - if n == p: - break - p = n - records.append("%d %s=%s\n" % (p, keyword, value)) - records = "".join(records) - - # We use a hardcoded "././@PaxHeader" name like star does - # instead of the one that POSIX recommends. - info = {} - info["name"] = "././@PaxHeader" - info["type"] = type - info["size"] = len(records) - info["magic"] = POSIX_MAGIC - - # Create pax header + record blocks. - return cls._create_header(info, USTAR_FORMAT) + cls._create_payload(records) - - @classmethod - def frombuf(cls, buf): - """ - Construct a TarInfo object from a 512 byte string buffer. - """ - if len(buf) == 0: - raise EmptyHeaderError("empty header") - if len(buf) != BLOCKSIZE: - raise TruncatedHeaderError("truncated header") - if buf.count(NUL) == BLOCKSIZE: - raise EOFHeaderError("end of file header") - - chksum = nti(buf[148:156]) - if chksum not in calc_chksums(buf): - raise InvalidHeaderError("bad checksum") - - obj = cls() - obj.buf = buf - obj.name = nts(buf[0:100]) - obj.mode = nti(buf[100:108]) - obj.uid = nti(buf[108:116]) - obj.gid = nti(buf[116:124]) - obj.size = nti(buf[124:136]) - obj.mtime = nti(buf[136:148]) - obj.chksum = chksum - obj.type = buf[156:157] - obj.linkname = nts(buf[157:257]) - obj.uname = nts(buf[265:297]) - obj.gname = nts(buf[297:329]) - obj.devmajor = nti(buf[329:337]) - obj.devminor = nti(buf[337:345]) - prefix = nts(buf[345:500]) - - # Old V7 tar format represents a directory as a regular - # file with a trailing slash. - if obj.type == AREGTYPE and obj.name.endswith("/"): - obj.type = DIRTYPE - - # Remove redundant slashes from directories. - if obj.isdir(): - obj.name = obj.name.rstrip("/") - - # Reconstruct a ustar longname. - if prefix and obj.type not in GNU_TYPES: - obj.name = prefix + "/" + obj.name - return obj - - @classmethod - def fromtarfile(cls, tarfile): - """ - Return the next TarInfo object from TarFile object tarfile. - """ - buf = tarfile.fileobj.read(BLOCKSIZE) - obj = cls.frombuf(buf) - obj.offset = tarfile.fileobj.tell() - BLOCKSIZE - return obj._proc_member(tarfile) - - # -------------------------------------------------------------------------- - # The following are methods that are called depending on the type of a - # member. The entry point is _proc_member() which can be overridden in a - # subclass to add custom _proc_*() methods. A _proc_*() method MUST - # implement the following - # operations: - # 1. Set self.offset_data to the position where the data blocks begin, - # if there is data that follows. - # 2. Set tarfile.offset to the position where the next member's header will - # begin. - # 3. Return self or another valid TarInfo object. - def _proc_member(self, tarfile): - """ - Choose the right processing method depending on the type and call it. - """ - if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK): - return self._proc_gnulong(tarfile) - elif self.type == GNUTYPE_SPARSE: - return self._proc_sparse(tarfile) - elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE): - return self._proc_pax(tarfile) - else: - return self._proc_builtin(tarfile) - - def _proc_builtin(self, tarfile): - """ - Process a builtin type or an unknown type which will be treated as a regular file. - """ - self.offset_data = tarfile.fileobj.tell() - offset = self.offset_data - if self.isreg() or self.type not in SUPPORTED_TYPES: - # Skip the following data blocks. - offset += self._block(self.size) - tarfile.offset = offset - - # Patch the TarInfo object with saved global - # header information. - self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors) - - return self - - def _proc_gnulong(self, tarfile): - """ - Process the blocks that hold a GNU longname or longlink member. - """ - buf = tarfile.fileobj.read(self._block(self.size)) - - # Fetch the next header and process it. - try: - next = self.fromtarfile(tarfile) - except HeaderError: - raise SubsequentHeaderError("missing or bad subsequent header") - - # Patch the TarInfo object from the next header with - # the longname information. - next.offset = self.offset - if self.type == GNUTYPE_LONGNAME: - next.name = nts(buf) - elif self.type == GNUTYPE_LONGLINK: - next.linkname = nts(buf) - - return next - - def _proc_sparse(self, tarfile): - """ - Process a GNU sparse header plus extra headers. - """ - buf = self.buf - sp = _ringbuffer() - pos = 386 - lastpos = 0 - realpos = 0 - # There are 4 possible sparse structs in the - # first header. - for i in range(4): - try: - offset = nti(buf[pos : pos + 12]) - numbytes = nti(buf[pos + 12 : pos + 24]) - except ValueError: - break - if offset > lastpos: - sp.append(_hole(lastpos, offset - lastpos)) - sp.append(_data(offset, numbytes, realpos)) - realpos += numbytes - lastpos = offset + numbytes - pos += 24 - - isextended = ord(buf[482]) - origsize = nti(buf[483:495]) - - # If the isextended flag is given, - # there are extra headers to process. - while isextended == 1: - buf = tarfile.fileobj.read(BLOCKSIZE) - pos = 0 - for i in range(21): - try: - offset = nti(buf[pos : pos + 12]) - numbytes = nti(buf[pos + 12 : pos + 24]) - except ValueError: - break - if offset > lastpos: - sp.append(_hole(lastpos, offset - lastpos)) - sp.append(_data(offset, numbytes, realpos)) - realpos += numbytes - lastpos = offset + numbytes - pos += 24 - isextended = ord(buf[504]) - - if lastpos < origsize: - sp.append(_hole(lastpos, origsize - lastpos)) - - self.sparse = sp - - self.offset_data = tarfile.fileobj.tell() - tarfile.offset = self.offset_data + self._block(self.size) - self.size = origsize - - return self - - def _proc_pax(self, tarfile): - """ - Process an extended or global header as described in POSIX.1-2001. - """ - # Read the header information. - buf = tarfile.fileobj.read(self._block(self.size)) - - # A pax header stores supplemental information for either - # the following file (extended) or all following files - # (global). - if self.type == XGLTYPE: - pax_headers = tarfile.pax_headers - else: - pax_headers = tarfile.pax_headers.copy() - - # Parse pax header information. A record looks like that: - # "%d %s=%s\n" % (length, keyword, value). length is the size - # of the complete record including the length field itself and - # the newline. keyword and value are both UTF-8 encoded strings. - regex = re.compile(r"(\d+) ([^=]+)=", re.U) - pos = 0 - while True: - match = regex.match(buf, pos) - if not match: - break - - length, keyword = match.groups() - length = int(length) - value = buf[match.end(2) + 1 : match.start(1) + length - 1] - - try: - keyword = keyword.decode("utf8") - except UnicodeDecodeError: - pass - - try: - value = value.decode("utf8") - except UnicodeDecodeError: - pass - - pax_headers[keyword] = value - pos += length - - # Fetch the next header. - try: - next = self.fromtarfile(tarfile) - except HeaderError: - raise SubsequentHeaderError("missing or bad subsequent header") - - if self.type in (XHDTYPE, SOLARIS_XHDTYPE): - # Patch the TarInfo object with the extended header info. - next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors) - next.offset = self.offset - - if "size" in pax_headers: - # If the extended header replaces the size field, - # we need to recalculate the offset where the next - # header starts. - offset = next.offset_data - if next.isreg() or next.type not in SUPPORTED_TYPES: - offset += next._block(next.size) - tarfile.offset = offset - - return next - - def _apply_pax_info(self, pax_headers, encoding, errors): - """ - Replace fields with supplemental information from a previous pax extended or global header. - """ - for keyword, value in pax_headers.items(): - if keyword not in PAX_FIELDS: - continue - - if keyword == "path": - value = value.rstrip("/") - - if keyword in PAX_NUMBER_FIELDS: - try: - value = PAX_NUMBER_FIELDS[keyword](value) - except ValueError: - value = 0 - else: - value = uts(value, encoding, errors) - - setattr(self, keyword, value) - - self.pax_headers = pax_headers.copy() - - def _block(self, count): - """ - Round up a byte count by BLOCKSIZE and return it, e.g. _block(834) => 1024. - """ - blocks, remainder = divmod(count, BLOCKSIZE) - if remainder: - blocks += 1 - return blocks * BLOCKSIZE - - def isreg(self): - return self.type in REGULAR_TYPES - - def isfile(self): - return self.isreg() - - def isdir(self): - return self.type == DIRTYPE - - def issym(self): - return self.type == SYMTYPE - - def islnk(self): - return self.type == LNKTYPE - - def ischr(self): - return self.type == CHRTYPE - - def isblk(self): - return self.type == BLKTYPE - - def isfifo(self): - return self.type == FIFOTYPE - - def issparse(self): - return self.type == GNUTYPE_SPARSE - - def isdev(self): - return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE) - - -# class TarInfo - - -class TarFile(object): - """ - The TarFile Class provides an interface to tar archives. - """ - - debug = 0 # May be set from 0 (no msgs) to 3 (all msgs) - - dereference = False # If true, add content of linked file to the - # tar file, else the link. - - ignore_zeros = False # If true, skips empty or invalid blocks and - # continues processing. - - errorlevel = 1 # If 0, fatal errors only appear in debug - # messages (if debug >= 0). If > 0, errors - # are passed to the caller as exceptions. - - format = DEFAULT_FORMAT # The format to use when creating an archive. - - encoding = ENCODING # Encoding for 8-bit character strings. - - errors = None # Error handler for unicode conversion. - - tarinfo = TarInfo # The default TarInfo class to use. - - fileobject = ExFileObject # The default ExFileObject class to use. - - def __init__( - self, - name=None, - mode="r", - fileobj=None, - format=None, - tarinfo=None, - dereference=None, - ignore_zeros=None, - encoding=None, - errors=None, - pax_headers=None, - debug=None, - errorlevel=None, - ): - """ - Open an (uncompressed) tar archive `name'. - - `mode' is either 'r' to read from an existing archive, 'a' to append data to an existing - file or 'w' to create a new file overwriting an existing one. `mode' defaults to 'r'. If - `fileobj' is given, it is used for reading or writing data. If it can be determined, `mode' - is overridden by `fileobj's mode. `fileobj' is not closed, when TarFile is closed. - """ - modes = {"r": "rb", "a": "r+b", "w": "wb"} - if mode not in modes: - raise ValueError("mode must be 'r', 'a' or 'w'") - self.mode = mode - self._mode = modes[mode] - - if not fileobj: - if self.mode == "a" and not os.path.exists(name): - # Create nonexistent files in append mode. - self.mode = "w" - self._mode = "wb" - fileobj = bltn_open(name, self._mode) - self._extfileobj = False - else: - if name is None and hasattr(fileobj, "name"): - name = fileobj.name - if hasattr(fileobj, "mode"): - self._mode = fileobj.mode - self._extfileobj = True - self.name = os.path.abspath(name) if name else None - self.fileobj = fileobj - - # Init attributes. - if format is not None: - self.format = format - if tarinfo is not None: - self.tarinfo = tarinfo - if dereference is not None: - self.dereference = dereference - if ignore_zeros is not None: - self.ignore_zeros = ignore_zeros - if encoding is not None: - self.encoding = encoding - - if errors is not None: - self.errors = errors - elif mode == "r": - self.errors = "utf-8" - else: - self.errors = "strict" - - if pax_headers is not None and self.format == PAX_FORMAT: - self.pax_headers = pax_headers - else: - self.pax_headers = {} - - if debug is not None: - self.debug = debug - if errorlevel is not None: - self.errorlevel = errorlevel - - # Init datastructures. - self.closed = False - self.members = [] # list of members as TarInfo objects - self._loaded = False # flag if all members have been read - self.offset = self.fileobj.tell() - # current position in the archive file - self.inodes = {} # dictionary caching the inodes of - # archive members already added - - try: - if self.mode == "r": - self.firstmember = None - self.firstmember = next(self) - - if self.mode == "a": - # Move to the end of the archive, - # before the first empty block. - while True: - self.fileobj.seek(self.offset) - try: - tarinfo = self.tarinfo.fromtarfile(self) - self.members.append(tarinfo) - except EOFHeaderError: - self.fileobj.seek(self.offset) - break - except HeaderError as e: - raise ReadError(str(e)) - - if self.mode in "aw": - self._loaded = True - - if self.pax_headers: - buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy()) - self.fileobj.write(buf) - self.offset += len(buf) - except: - if not self._extfileobj: - self.fileobj.close() - self.closed = True - raise - - def _getposix(self): - return self.format == USTAR_FORMAT - - def _setposix(self, value): - import warnings - - warnings.warn("use the format attribute instead", DeprecationWarning, 2) - if value: - self.format = USTAR_FORMAT - else: - self.format = GNU_FORMAT - - posix = property(_getposix, _setposix) - - # -------------------------------------------------------------------------- - # Below are the classmethods which act as alternate constructors to the - # TarFile class. The open() method is the only one that is needed for - # public use; it is the "super"-constructor and is able to select an - # adequate "sub"-constructor for a particular compression using the mapping - # from OPEN_METH. - # - # This concept allows one to subclass TarFile without losing the comfort of - # the super-constructor. A sub-constructor is registered and made available - # by adding it to the mapping in OPEN_METH. - - @classmethod - def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs): - """ - Open a tar archive for reading, writing or appending. Return an appropriate TarFile class. - - mode: - 'r' or 'r:*' open for reading with transparent compression - 'r:' open for reading exclusively uncompressed - 'r:gz' open for reading with gzip compression - 'r:bz2' open for reading with bzip2 compression - 'a' or 'a:' open for appending, creating the file if necessary - 'w' or 'w:' open for writing without compression - 'w:gz' open for writing with gzip compression - 'w:bz2' open for writing with bzip2 compression - - 'r|*' open a stream of tar blocks with transparent compression - 'r|' open an uncompressed stream of tar blocks for reading - 'r|gz' open a gzip compressed stream of tar blocks - 'r|bz2' open a bzip2 compressed stream of tar blocks - 'w|' open an uncompressed stream for writing - 'w|gz' open a gzip compressed stream for writing - 'w|bz2' open a bzip2 compressed stream for writing - """ - - if not name and not fileobj: - raise ValueError("nothing to open") - - if mode in ("r", "r:*"): - # Find out which *open() is appropriate for opening the file. - for comptype in cls.OPEN_METH: - func = getattr(cls, cls.OPEN_METH[comptype]) - if fileobj is not None: - saved_pos = fileobj.tell() - try: - return func(name, "r", fileobj, **kwargs) - except (ReadError, CompressionError) as e: - if fileobj is not None: - fileobj.seek(saved_pos) - continue - raise ReadError("file could not be opened successfully") - - elif ":" in mode: - filemode, comptype = mode.split(":", 1) - filemode = filemode or "r" - comptype = comptype or "tar" - - # Select the *open() function according to - # given compression. - if comptype in cls.OPEN_METH: - func = getattr(cls, cls.OPEN_METH[comptype]) - else: - raise CompressionError("unknown compression type %r" % comptype) - return func(name, filemode, fileobj, **kwargs) - - elif "|" in mode: - filemode, comptype = mode.split("|", 1) - filemode = filemode or "r" - comptype = comptype or "tar" - - if filemode not in ("r", "w"): - raise ValueError("mode must be 'r' or 'w'") - - stream = _Stream(name, filemode, comptype, fileobj, bufsize) - try: - t = cls(name, filemode, stream, **kwargs) - except: - stream.close() - raise - t._extfileobj = False - return t - - elif mode in ("a", "w"): - return cls.taropen(name, mode, fileobj, **kwargs) - - raise ValueError("undiscernible mode") - - @classmethod - def taropen(cls, name, mode="r", fileobj=None, **kwargs): - """ - Open uncompressed tar archive name for reading or writing. - """ - if mode not in ("r", "a", "w"): - raise ValueError("mode must be 'r', 'a' or 'w'") - return cls(name, mode, fileobj, **kwargs) - - @classmethod - def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs): - """ - Open gzip compressed tar archive name for reading or writing. - - Appending is not allowed. - """ - if mode not in ("r", "w"): - raise ValueError("mode must be 'r' or 'w'") - - try: - import gzip - - gzip.GzipFile - except (ImportError, AttributeError): - raise CompressionError("gzip module is not available") - - try: - fileobj = gzip.GzipFile(name, mode, compresslevel, fileobj) - except OSError: - if fileobj is not None and mode == "r": - raise ReadError("not a gzip file") - raise - - try: - t = cls.taropen(name, mode, fileobj, **kwargs) - except IOError: - fileobj.close() - if mode == "r": - raise ReadError("not a gzip file") - raise - except: - fileobj.close() - raise - t._extfileobj = False - return t - - @classmethod - def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs): - """ - Open bzip2 compressed tar archive name for reading or writing. - - Appending is not allowed. - """ - if mode not in ("r", "w"): - raise ValueError("mode must be 'r' or 'w'.") - - try: - import bz2 - except ImportError: - raise CompressionError("bz2 module is not available") - - if fileobj is not None: - fileobj = _BZ2Proxy(fileobj, mode) - else: - fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel) - - try: - t = cls.taropen(name, mode, fileobj, **kwargs) - except (IOError, EOFError): - fileobj.close() - if mode == "r": - raise ReadError("not a bzip2 file") - raise - except: - fileobj.close() - raise - t._extfileobj = False - return t - - # All *open() methods are registered here. - OPEN_METH = { - "tar": "taropen", # uncompressed tar - "gz": "gzopen", # gzip compressed tar - "bz2": "bz2open", # bzip2 compressed tar - } - - # -------------------------------------------------------------------------- - # The public methods which TarFile provides: - - def close(self): - """ - Close the TarFile. - - In write-mode, two finishing zero blocks are appended to the archive. - """ - if self.closed: - return - - self.closed = True - try: - if self.mode in "aw": - self.fileobj.write(NUL * (BLOCKSIZE * 2)) - self.offset += BLOCKSIZE * 2 - # fill up the end with zero-blocks - # (like option -b20 for tar does) - blocks, remainder = divmod(self.offset, RECORDSIZE) - if remainder > 0: - self.fileobj.write(NUL * (RECORDSIZE - remainder)) - finally: - if not self._extfileobj: - self.fileobj.close() - - def getmember(self, name): - """ - Return a TarInfo object for member `name'. - - If `name' can not be found in the archive, KeyError is raised. If a member occurs more than - once in the archive, its last occurrence is assumed to be the most up-to-date version. - """ - tarinfo = self._getmember(name) - if tarinfo is None: - raise KeyError("filename %r not found" % name) - return tarinfo - - def getmembers(self): - """ - Return the members of the archive as a list of TarInfo objects. - - The list has the same order as the members in the archive. - """ - self._check() - if not self._loaded: # if we want to obtain a list of - self._load() # all members, we first have to - # scan the whole archive. - return self.members - - def getnames(self): - """ - Return the members of the archive as a list of their names. - - It has the same order as the list returned by getmembers(). - """ - return [tarinfo.name for tarinfo in self.getmembers()] - - def gettarinfo(self, name=None, arcname=None, fileobj=None): - """ - Create a TarInfo object for either the file `name' or the file object `fileobj' (using - os.fstat on its file descriptor). - - You can modify some of the TarInfo's attributes before you add it using addfile(). If given, - `arcname' specifies an alternative name for the file in the archive. - """ - self._check("aw") - - # When fileobj is given, replace name by - # fileobj's real name. - if fileobj is not None: - name = fileobj.name - - # Building the name of the member in the archive. - # Backward slashes are converted to forward slashes, - # Absolute paths are turned to relative paths. - if arcname is None: - arcname = name - drv, arcname = os.path.splitdrive(arcname) - arcname = arcname.replace(os.sep, "/") - arcname = arcname.lstrip("/") - - # Now, fill the TarInfo object with - # information specific for the file. - tarinfo = self.tarinfo() - tarinfo.tarfile = self - - # Use os.stat or os.lstat, depending on platform - # and if symlinks shall be resolved. - if fileobj is None: - if hasattr(os, "lstat") and not self.dereference: - statres = os.lstat(name) - else: - statres = os.stat(name) - else: - statres = os.fstat(fileobj.fileno()) - linkname = "" - - stmd = statres.st_mode - if stat.S_ISREG(stmd): - inode = (statres.st_ino, statres.st_dev) - if ( - not self.dereference - and statres.st_nlink > 1 - and inode in self.inodes - and arcname != self.inodes[inode] - ): - # Is it a hardlink to an already - # archived file? - type = LNKTYPE - linkname = self.inodes[inode] - else: - # The inode is added only if its valid. - # For win32 it is always 0. - type = REGTYPE - if inode[0]: - self.inodes[inode] = arcname - elif stat.S_ISDIR(stmd): - type = DIRTYPE - elif stat.S_ISFIFO(stmd): - type = FIFOTYPE - elif stat.S_ISLNK(stmd): - type = SYMTYPE - linkname = os.readlink(name) - elif stat.S_ISCHR(stmd): - type = CHRTYPE - elif stat.S_ISBLK(stmd): - type = BLKTYPE - else: - return None - - # Fill the TarInfo object with all - # information we can get. - tarinfo.name = arcname - tarinfo.mode = stmd - tarinfo.uid = statres.st_uid - tarinfo.gid = statres.st_gid - if type == REGTYPE: - tarinfo.size = statres.st_size - else: - tarinfo.size = 0 - tarinfo.mtime = statres.st_mtime - tarinfo.type = type - tarinfo.linkname = linkname - if pwd: - try: - tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0] - except KeyError: - pass - if grp: - try: - tarinfo.gname = grp.getgrgid(tarinfo.gid)[0] - except KeyError: - pass - - if type in (CHRTYPE, BLKTYPE): - if hasattr(os, "major") and hasattr(os, "minor"): - tarinfo.devmajor = os.major(statres.st_rdev) - tarinfo.devminor = os.minor(statres.st_rdev) - return tarinfo - - def list(self, verbose=True): - """ - Print a table of contents to sys.stdout. - - If `verbose' is False, only the names of the members are printed. If it is True, an `ls - -l'-like output is produced. - """ - self._check() - - for tarinfo in self: - if verbose: - print(filemode(tarinfo.mode), end=" ") - print( - "%s/%s" % (tarinfo.uname or tarinfo.uid, tarinfo.gname or tarinfo.gid), end=" " - ) - if tarinfo.ischr() or tarinfo.isblk(): - print("%10s" % ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor)), end=" ") - else: - print("%10d" % tarinfo.size, end=" ") - print("%d-%02d-%02d %02d:%02d:%02d" % time.localtime(tarinfo.mtime)[:6], end=" ") - - print(tarinfo.name + ("/" if tarinfo.isdir() else ""), end=" ") - - if verbose: - if tarinfo.issym(): - print("->", tarinfo.linkname, end=" ") - if tarinfo.islnk(): - print("link to", tarinfo.linkname, end=" ") - print() - - def add(self, name, arcname=None, recursive=True, exclude=None, filter=None): - """ - Add the file `name' to the archive. - - `name' may be any type of file (directory, fifo, symbolic link, etc.). If given, `arcname' - specifies an alternative name for the file in the archive. Directories are added recursively - by default. This can be avoided by setting `recursive' to False. `exclude' is a function - that should return True for each filename to be excluded. `filter' is a function that - expects a TarInfo object argument and returns the changed TarInfo object, if it returns None - the TarInfo object will be excluded from the archive. - """ - self._check("aw") - - if arcname is None: - arcname = name - - # Exclude pathnames. - if exclude is not None: - import warnings - - warnings.warn("use the filter argument instead", DeprecationWarning, 2) - if exclude(name): - self._dbg(2, "tarfile: Excluded %r" % name) - return - - # Skip if somebody tries to archive the archive... - if self.name is not None and os.path.abspath(name) == self.name: - self._dbg(2, "tarfile: Skipped %r" % name) - return - - self._dbg(1, name) - - # Create a TarInfo object from the file. - tarinfo = self.gettarinfo(name, arcname) - - if tarinfo is None: - self._dbg(1, "tarfile: Unsupported type %r" % name) - return - - # Change or exclude the TarInfo object. - if filter is not None: - tarinfo = list(filter(tarinfo)) - if tarinfo is None: - self._dbg(2, "tarfile: Excluded %r" % name) - return - - # Append the tar header and data to the archive. - if tarinfo.isreg(): - with bltn_open(name, "rb") as f: - self.addfile(tarinfo, f) - - elif tarinfo.isdir(): - self.addfile(tarinfo) - if recursive: - for f in os.listdir(name): - self.add( - os.path.join(name, f), os.path.join(arcname, f), recursive, exclude, filter - ) - - else: - self.addfile(tarinfo) - - def addfile(self, tarinfo, fileobj=None): - """ - Add the TarInfo object `tarinfo' to the archive. - - If `fileobj' is given, tarinfo.size bytes are read from it and added to the archive. You can - create TarInfo objects using gettarinfo(). On Windows platforms, `fileobj' should always be - opened with mode 'rb' to avoid irritation about the file size. - """ - self._check("aw") - - tarinfo = copy.copy(tarinfo) - - buf = tarinfo.tobuf(self.format, self.encoding, self.errors) - self.fileobj.write(buf) - self.offset += len(buf) - - # If there's data to follow, append it. - if fileobj is not None: - copyfileobj(fileobj, self.fileobj, tarinfo.size) - blocks, remainder = divmod(tarinfo.size, BLOCKSIZE) - if remainder > 0: - self.fileobj.write(NUL * (BLOCKSIZE - remainder)) - blocks += 1 - self.offset += blocks * BLOCKSIZE - - self.members.append(tarinfo) - - def extractall(self, path=".", members=None): - """ - Extract all members from the archive to the current working directory and set owner, - modification time and permissions on directories afterwards. - - `path' specifies a different directory to extract to. `members' is optional and must be a - subset of the list returned by getmembers(). - """ - directories = [] - - if members is None: - members = self - - for tarinfo in members: - if tarinfo.isdir(): - # Extract directories with a safe mode. - directories.append(tarinfo) - tarinfo = copy.copy(tarinfo) - tarinfo.mode = 0o700 - self.extract(tarinfo, path) - - # Reverse sort directories. - directories.sort(key=operator.attrgetter("name")) - directories.reverse() - - # Set correct owner, mtime and filemode on directories. - for tarinfo in directories: - dirpath = os.path.join(path, tarinfo.name) - try: - self.chown(tarinfo, dirpath) - self.utime(tarinfo, dirpath) - self.chmod(tarinfo, dirpath) - except ExtractError as e: - if self.errorlevel > 1: - raise - else: - self._dbg(1, "tarfile: %s" % e) - - def extract(self, member, path=""): - """ - Extract a member from the archive to the current working directory, using its full name. - - Its file information is extracted as accurately as possible. `member' may be a filename or a - TarInfo object. You can specify a different directory using `path'. - """ - self._check("r") - - if isinstance(member, str): - tarinfo = self.getmember(member) - else: - tarinfo = member - - # Prepare the link target for makelink(). - if tarinfo.islnk(): - tarinfo._link_target = os.path.join(path, tarinfo.linkname) - - try: - self._extract_member(tarinfo, os.path.join(path, tarinfo.name)) - except EnvironmentError as e: - if self.errorlevel > 0: - raise - else: - if e.filename is None: - self._dbg(1, "tarfile: %s" % e.strerror) - else: - self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename)) - except ExtractError as e: - if self.errorlevel > 1: - raise - else: - self._dbg(1, "tarfile: %s" % e) - - def extractfile(self, member): - """ - Extract a member from the archive as a file object. - - `member' may be - a filename or a TarInfo object. If `member' is a regular file, a - file-like object is returned. If `member' is a link, a file-like - object is constructed from the link's target. If `member' is none of - the above, None is returned. - The file-like object is read-only and provides the following - methods: read(), readline(), readlines(), seek() and tell() - """ - self._check("r") - - if isinstance(member, str): - tarinfo = self.getmember(member) - else: - tarinfo = member - - if tarinfo.isreg(): - return self.fileobject(self, tarinfo) - - elif tarinfo.type not in SUPPORTED_TYPES: - # If a member's type is unknown, it is treated as a - # regular file. - return self.fileobject(self, tarinfo) - - elif tarinfo.islnk() or tarinfo.issym(): - if isinstance(self.fileobj, _Stream): - # A small but ugly workaround for the case that someone tries - # to extract a (sym)link as a file-object from a non-seekable - # stream of tar blocks. - raise StreamError("cannot extract (sym)link as file object") - else: - # A (sym)link's file object is its target's file object. - return self.extractfile(self._find_link_target(tarinfo)) - else: - # If there's no data associated with the member (directory, chrdev, - # blkdev, etc.), return None instead of a file object. - return None - - def _extract_member(self, tarinfo, targetpath): - """ - Extract the TarInfo object tarinfo to a physical file called targetpath. - """ - # Fetch the TarInfo object for the given name - # and build the destination pathname, replacing - # forward slashes to platform specific separators. - targetpath = targetpath.rstrip("/") - targetpath = targetpath.replace("/", os.sep) - - # Create all upper directories. - upperdirs = os.path.dirname(targetpath) - if upperdirs and not os.path.exists(upperdirs): - # Create directories that are not part of the archive with - # default permissions. - os.makedirs(upperdirs) - - if tarinfo.islnk() or tarinfo.issym(): - self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname)) - else: - self._dbg(1, tarinfo.name) - - if tarinfo.isreg(): - self.makefile(tarinfo, targetpath) - elif tarinfo.isdir(): - self.makedir(tarinfo, targetpath) - elif tarinfo.isfifo(): - self.makefifo(tarinfo, targetpath) - elif tarinfo.ischr() or tarinfo.isblk(): - self.makedev(tarinfo, targetpath) - elif tarinfo.islnk() or tarinfo.issym(): - self.makelink(tarinfo, targetpath) - elif tarinfo.type not in SUPPORTED_TYPES: - self.makeunknown(tarinfo, targetpath) - else: - self.makefile(tarinfo, targetpath) - - self.chown(tarinfo, targetpath) - if not tarinfo.issym(): - self.chmod(tarinfo, targetpath) - self.utime(tarinfo, targetpath) - - # -------------------------------------------------------------------------- - # Below are the different file methods. They are called via - # _extract_member() when extract() is called. They can be replaced in a - # subclass to implement other functionality. - - def makedir(self, tarinfo, targetpath): - """ - Make a directory called targetpath. - """ - try: - # Use a safe mode for the directory, the real mode is set - # later in _extract_member(). - os.mkdir(targetpath, 0o700) - except EnvironmentError as e: - if e.errno != errno.EEXIST: - raise - - def makefile(self, tarinfo, targetpath): - """ - Make a file called targetpath. - """ - source = self.extractfile(tarinfo) - try: - with bltn_open(targetpath, "wb") as target: - copyfileobj(source, target) - finally: - source.close() - - def makeunknown(self, tarinfo, targetpath): - """ - Make a file from a TarInfo object with an unknown type at targetpath. - """ - self.makefile(tarinfo, targetpath) - self._dbg(1, "tarfile: Unknown file type %r, " "extracted as regular file." % tarinfo.type) - - def makefifo(self, tarinfo, targetpath): - """ - Make a fifo called targetpath. - """ - if hasattr(os, "mkfifo"): - os.mkfifo(targetpath) - else: - raise ExtractError("fifo not supported by system") - - def makedev(self, tarinfo, targetpath): - """ - Make a character or block device called targetpath. - """ - if not hasattr(os, "mknod") or not hasattr(os, "makedev"): - raise ExtractError("special devices not supported by system") - - mode = tarinfo.mode - if tarinfo.isblk(): - mode |= stat.S_IFBLK - else: - mode |= stat.S_IFCHR - - os.mknod(targetpath, mode, os.makedev(tarinfo.devmajor, tarinfo.devminor)) - - def makelink(self, tarinfo, targetpath): - """ - Make a (symbolic) link called targetpath. - - If it cannot be created (platform limitation), we try to make a copy of the referenced file - instead of a link. - """ - if hasattr(os, "symlink") and hasattr(os, "link"): - # For systems that support symbolic and hard links. - if tarinfo.issym(): - if os.path.lexists(targetpath): - os.unlink(targetpath) - os.symlink(tarinfo.linkname, targetpath) - else: - # See extract(). - if os.path.exists(tarinfo._link_target): - if os.path.lexists(targetpath): - os.unlink(targetpath) - os.link(tarinfo._link_target, targetpath) - else: - self._extract_member(self._find_link_target(tarinfo), targetpath) - else: - try: - self._extract_member(self._find_link_target(tarinfo), targetpath) - except KeyError: - raise ExtractError("unable to resolve link inside archive") - - def chown(self, tarinfo, targetpath): - """ - Set owner of targetpath according to tarinfo. - """ - if pwd and hasattr(os, "geteuid") and os.geteuid() == 0: - # We have to be root to do so. - try: - g = grp.getgrnam(tarinfo.gname)[2] - except KeyError: - g = tarinfo.gid - try: - u = pwd.getpwnam(tarinfo.uname)[2] - except KeyError: - u = tarinfo.uid - try: - if tarinfo.issym() and hasattr(os, "lchown"): - os.lchown(targetpath, u, g) - else: - if sys.platform != "os2emx": - os.chown(targetpath, u, g) - except EnvironmentError as e: - raise ExtractError("could not change owner") - - def chmod(self, tarinfo, targetpath): - """ - Set file permissions of targetpath according to tarinfo. - """ - if hasattr(os, "chmod"): - try: - os.chmod(targetpath, tarinfo.mode) - except EnvironmentError as e: - raise ExtractError("could not change mode") - - def utime(self, tarinfo, targetpath): - """ - Set modification time of targetpath according to tarinfo. - """ - if not hasattr(os, "utime"): - return - try: - os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime)) - except EnvironmentError as e: - raise ExtractError("could not change modification time") - - # -------------------------------------------------------------------------- - def __next__(self): - """ - Return the next member of the archive as a TarInfo object, when TarFile is opened for - reading. - - Return None if there is no more available. - """ - self._check("ra") - if self.firstmember is not None: - m = self.firstmember - self.firstmember = None - return m - - # Advance the file pointer. - if self.offset != self.fileobj.tell(): - self.fileobj.seek(self.offset - 1) - if not self.fileobj.read(1): - raise ReadError("unexpected end of data") - - # Read the next block. - tarinfo = None - while True: - try: - tarinfo = self.tarinfo.fromtarfile(self) - except EOFHeaderError as e: - if self.ignore_zeros: - self._dbg(2, "0x%X: %s" % (self.offset, e)) - self.offset += BLOCKSIZE - continue - except InvalidHeaderError as e: - if self.ignore_zeros: - self._dbg(2, "0x%X: %s" % (self.offset, e)) - self.offset += BLOCKSIZE - continue - elif self.offset == 0: - raise ReadError(str(e)) - except EmptyHeaderError: - if self.offset == 0: - raise ReadError("empty file") - except TruncatedHeaderError as e: - if self.offset == 0: - raise ReadError(str(e)) - except SubsequentHeaderError as e: - raise ReadError(str(e)) - break - - if tarinfo is not None: - self.members.append(tarinfo) - else: - self._loaded = True - - return tarinfo - - # -------------------------------------------------------------------------- - # Little helper methods: - - def _getmember(self, name, tarinfo=None, normalize=False): - """ - Find an archive member by name from bottom to top. - - If tarinfo is given, it is used as the starting point. - """ - # Ensure that all members have been loaded. - members = self.getmembers() - - # Limit the member search list up to tarinfo. - if tarinfo is not None: - members = members[: members.index(tarinfo)] - - if normalize: - name = os.path.normpath(name) - - for member in reversed(members): - if normalize: - member_name = os.path.normpath(member.name) - else: - member_name = member.name - - if name == member_name: - return member - - def _load(self): - """ - Read through the entire archive file and look for readable members. - """ - while True: - tarinfo = next(self) - if tarinfo is None: - break - self._loaded = True - - def _check(self, mode=None): - """ - Check if TarFile is still open, and if the operation's mode corresponds to TarFile's mode. - """ - if self.closed: - raise IOError("%s is closed" % self.__class__.__name__) - if mode is not None and self.mode not in mode: - raise IOError("bad operation for mode %r" % self.mode) - - def _find_link_target(self, tarinfo): - """ - Find the target member of a symlink or hardlink member in the archive. - """ - if tarinfo.issym(): - # Always search the entire archive. - linkname = "/".join( - [_f for _f in (os.path.dirname(tarinfo.name), tarinfo.linkname) if _f] - ) - limit = None - else: - # Search the archive before the link, because a hard link is - # just a reference to an already archived file. - linkname = tarinfo.linkname - limit = tarinfo - - member = self._getmember(linkname, tarinfo=limit, normalize=True) - if member is None: - raise KeyError("linkname %r not found" % linkname) - return member - - def __iter__(self): - """ - Provide an iterator object. - """ - if self._loaded: - return iter(self.members) - else: - return TarIter(self) - - def _dbg(self, level, msg): - """ - Write debugging output to sys.stderr. - """ - if level <= self.debug: - print(msg, file=sys.stderr) - - def __enter__(self): - self._check() - return self - - def __exit__(self, type, value, traceback): - if type is None: - self.close() - else: - # An exception occurred. We must not call close() because - # it would try to write end-of-archive blocks and padding. - if not self._extfileobj: - self.fileobj.close() - self.closed = True - - -# class TarFile - - -class TarIter: - """ - Iterator Class. - - for tarinfo in TarFile(...): suite... - """ - - def __init__(self, tarfile): - """ - Construct a TarIter object. - """ - self.tarfile = tarfile - self.index = 0 - - def __iter__(self): - """ - Return iterator object. - """ - return self - - def __next__(self): - """ - Return the next item using TarFile's next() method. - - When all members have been read, set TarFile as _loaded. - """ - # Fix for SF #1100429: Under rare circumstances it can - # happen that getmembers() is called during iteration, - # which will cause TarIter to stop prematurely. - - if self.index == 0 and self.tarfile.firstmember is not None: - tarinfo = next(self.tarfile) - elif self.index < len(self.tarfile.members): - tarinfo = self.tarfile.members[self.index] - elif not self.tarfile._loaded: - tarinfo = next(self.tarfile) - if not tarinfo: - self.tarfile._loaded = True - raise StopIteration - else: - raise StopIteration - self.index += 1 - return tarinfo - - -# Helper classes for sparse file support -class _section: - """ - Base class for _data and _hole. - """ - - def __init__(self, offset, size): - self.offset = offset - self.size = size - - def __contains__(self, offset): - return self.offset <= offset < self.offset + self.size - - -class _data(_section): - """ - Represent a data section in a sparse file. - """ - - def __init__(self, offset, size, realpos): - _section.__init__(self, offset, size) - self.realpos = realpos - - -class _hole(_section): - """ - Represent a hole section in a sparse file. - """ - - pass - - -class _ringbuffer(list): - """ - Ringbuffer class which increases performance over a regular list. - """ - - def __init__(self): - self.idx = 0 - - def find(self, offset): - idx = self.idx - while True: - item = self[idx] - if offset in item: - break - idx += 1 - if idx == len(self): - idx = 0 - if idx == self.idx: - # End of File - return None - self.idx = idx - return item - - -# --------------------------------------------- -# zipfile compatible TarFile class -# --------------------------------------------- -TAR_PLAIN = 0 # zipfile.ZIP_STORED -TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED - - -class TarFileCompat: - """ - TarFile class compatible with standard module zipfile's ZipFile class. - """ - - def __init__(self, file, mode="r", compression=TAR_PLAIN): - from warnings import warnpy3k - - warnpy3k("the TarFileCompat class has been removed in Python 3.0", stacklevel=2) - if compression == TAR_PLAIN: - self.tarfile = TarFile.taropen(file, mode) - elif compression == TAR_GZIPPED: - self.tarfile = TarFile.gzopen(file, mode) - else: - raise ValueError("unknown compression constant") - if mode[0:1] == "r": - members = self.tarfile.getmembers() - for m in members: - m.filename = m.name - m.file_size = m.size - m.date_time = time.gmtime(m.mtime)[:6] - - def namelist(self): - return [m.name for m in self.infolist()] - - def infolist(self): - return [m for m in self.tarfile.getmembers() if m.type in REGULAR_TYPES] - - def printdir(self): - self.tarfile.list() - - def testzip(self): - return - - def getinfo(self, name): - return self.tarfile.getmember(name) - - def read(self, name): - return self.tarfile.extractfile(self.tarfile.getmember(name)).read() - - def write(self, filename, arcname=None, compress_type=None): - self.tarfile.add(filename, arcname) - - def writestr(self, zinfo, bytes): - try: - from io import StringIO - except ImportError: - from io import StringIO - import calendar - - tinfo = TarInfo(zinfo.filename) - tinfo.size = len(bytes) - tinfo.mtime = calendar.timegm(zinfo.date_time) - self.tarfile.addfile(tinfo, StringIO(bytes)) - - def close(self): - self.tarfile.close() - - -# class TarFileCompat - -# -------------------- -# exported functions -# -------------------- -def is_tarfile(name): - """ - Return True if name points to a tar archive that we are able to handle, else return False. - """ - try: - t = open(name) - t.close() - return True - except TarError: - return False - - -open = TarFile.open diff --git a/util/verifybackfill.py b/util/verifybackfill.py deleted file mode 100644 index 04bc4bf98..000000000 --- a/util/verifybackfill.py +++ /dev/null @@ -1,83 +0,0 @@ -import logging -import sys - -from app import app -from data import model -from data.database import ( - RepositoryTag, - Repository, - TagToRepositoryTag, - TagManifest, - ManifestLegacyImage, -) - -logger = logging.getLogger(__name__) - - -def _vs(first, second): - return "%s vs %s" % (first, second) - - -def verify_backfill(namespace_name): - logger.info("Checking namespace %s", namespace_name) - namespace_user = model.user.get_namespace_user(namespace_name) - assert namespace_user - - repo_tags = ( - RepositoryTag.select() - .join(Repository) - .where(Repository.namespace_user == namespace_user) - .where(RepositoryTag.hidden == False) - ) - - repo_tags = list(repo_tags) - logger.info("Found %s tags", len(repo_tags)) - - for index, repo_tag in enumerate(repo_tags): - logger.info( - "Checking tag %s under repository %s (%s/%s)", - repo_tag.name, - repo_tag.repository.name, - index + 1, - len(repo_tags), - ) - - tag = TagToRepositoryTag.get(repository_tag=repo_tag).tag - assert not tag.hidden - assert tag.repository == repo_tag.repository - assert tag.name == repo_tag.name, _vs(tag.name, repo_tag.name) - assert tag.repository == repo_tag.repository, _vs(tag.repository_id, repo_tag.repository_id) - assert tag.reversion == repo_tag.reversion, _vs(tag.reversion, repo_tag.reversion) - - start_check = int(tag.lifetime_start_ms // 1000) == repo_tag.lifetime_start_ts - assert start_check, _vs(tag.lifetime_start_ms, repo_tag.lifetime_start_ts) - if repo_tag.lifetime_end_ts is not None: - end_check = int(tag.lifetime_end_ms // 1000) == repo_tag.lifetime_end_ts - assert end_check, _vs(tag.lifetime_end_ms, repo_tag.lifetime_end_ts) - else: - assert tag.lifetime_end_ms is None - - try: - tag_manifest = tag.manifest - repo_tag_manifest = TagManifest.get(tag=repo_tag) - - digest_check = tag_manifest.digest == repo_tag_manifest.digest - assert digest_check, _vs(tag_manifest.digest, repo_tag_manifest.digest) - - bytes_check = tag_manifest.manifest_bytes == repo_tag_manifest.json_data - assert bytes_check, _vs(tag_manifest.manifest_bytes, repo_tag_manifest.json_data) - except TagManifest.DoesNotExist: - logger.info("No tag manifest found for repository tag %s", repo_tag.id) - - mli = ManifestLegacyImage.get(manifest=tag_manifest) - assert mli.repository == repo_tag.repository - - manifest_legacy_image = mli.image - assert manifest_legacy_image == repo_tag.image, _vs( - manifest_legacy_image.id, repo_tag.image_id - ) - - -if __name__ == "__main__": - logging.basicConfig(level=logging.INFO) - verify_backfill(sys.argv[1]) diff --git a/verbs.py b/verbs.py deleted file mode 100644 index 59c9b4c29..000000000 --- a/verbs.py +++ /dev/null @@ -1,7 +0,0 @@ -# NOTE: We don't gevent patch here because `verbs` uses `sync` workers. - -from app import app as application -from endpoints.verbs import verbs - - -application.register_blueprint(verbs, url_prefix="/c1") diff --git a/workers/manifestbackfillworker.py b/workers/manifestbackfillworker.py new file mode 100644 index 000000000..a576622cd --- /dev/null +++ b/workers/manifestbackfillworker.py @@ -0,0 +1,101 @@ +import logging + +from peewee import fn + +import features + +from app import app +from data.database import Manifest +from image.shared.schemas import parse_manifest_from_bytes, ManifestException +from workers.worker import Worker +from util.migrate.allocator import yield_random_entries +from util.bytes import Bytes +from util.log import logfile_path + + +logger = logging.getLogger(__name__) + +WORKER_FREQUENCY = app.config.get("MANIFEST_BACKFILL_WORKER_FREQUENCY", 60 * 60) + + +class ManifestBackfillWorker(Worker): + """ + Worker which backfills the newly added layers compressed size and config media type + fields onto Manifest. + """ + + def __init__(self): + super(ManifestBackfillWorker, self).__init__() + self.add_operation(self._backfill_manifests, WORKER_FREQUENCY) + + def _backfill_manifests(self): + try: + Manifest.select().where(Manifest.layers_compressed_size >> None).get() + except Manifest.DoesNotExist: + logger.debug("Manifest backfill worker has completed; skipping") + return False + + iterator = yield_random_entries( + lambda: Manifest.select().where(Manifest.layers_compressed_size >> None), + Manifest.id, + 250, + Manifest.select(fn.Max(Manifest.id)).scalar(), + 1, + ) + + for manifest_row, abt, _ in iterator: + if manifest_row.layers_compressed_size is not None: + logger.debug("Another worker preempted this worker") + abt.set() + continue + + logger.debug("Setting layers compressed size for manifest %s", manifest_row.id) + layers_compressed_size = -1 + config_media_type = None + manifest_bytes = Bytes.for_string_or_unicode(manifest_row.manifest_bytes) + + try: + parsed = parse_manifest_from_bytes( + manifest_bytes, manifest_row.media_type.name, validate=False + ) + layers_compressed_size = parsed.layers_compressed_size + if layers_compressed_size is None: + layers_compressed_size = 0 + + config_media_type = parsed.config_media_type or None + except ManifestException as me: + logger.warning( + "Got exception when trying to parse manifest %s: %s", manifest_row.id, me + ) + + assert layers_compressed_size is not None + updated = ( + Manifest.update( + layers_compressed_size=layers_compressed_size, + config_media_type=config_media_type, + ) + .where(Manifest.id == manifest_row.id, Manifest.layers_compressed_size >> None) + .execute() + ) + if updated != 1: + logger.debug("Another worker preempted this worker") + abt.set() + continue + + return True + + +def main(): + logging.config.fileConfig(logfile_path(debug=False), disable_existing_loggers=False) + + if not features.MANIFEST_SIZE_BACKFILL: + logger.debug("Manifest backfill worker not enabled; skipping") + while True: + time.sleep(100000) + + worker = ManifestBackfillWorker() + worker.start() + + +if __name__ == "__main__": + main() diff --git a/workers/notificationworker/models_pre_oci.py b/workers/notificationworker/models_pre_oci.py index 15fb912da..1db25e92f 100644 --- a/workers/notificationworker/models_pre_oci.py +++ b/workers/notificationworker/models_pre_oci.py @@ -1,6 +1,7 @@ import json from data import model +from data.database import RepositoryNotification from workers.notificationworker.models_interface import ( NotificationWorkerDataInterface, Notification, @@ -14,8 +15,8 @@ def notification(notification_row): """ return Notification( uuid=notification_row.uuid, - event_name=notification_row.event.name, - method_name=notification_row.method.name, + event_name=RepositoryNotification.event.get_name(notification_row.event_id), + method_name=RepositoryNotification.method.get_name(notification_row.method_id), event_config_dict=json.loads(notification_row.event_config_json or "{}"), method_config_dict=json.loads(notification_row.config_json or "{}"), repository=Repository( diff --git a/workers/repomirrorworker/test/test_repomirrorworker.py b/workers/repomirrorworker/test/test_repomirrorworker.py index 5897101c0..31996a9e9 100644 --- a/workers/repomirrorworker/test/test_repomirrorworker.py +++ b/workers/repomirrorworker/test/test_repomirrorworker.py @@ -56,14 +56,18 @@ def _create_tag(repo, name): ) upload.upload_chunk(app_config, BytesIO(config_json.encode("utf-8"))) blob = upload.commit_to_blob(app_config) + assert blob + builder = DockerSchema2ManifestBuilder() builder.set_config_digest(blob.digest, blob.compressed_size) builder.add_layer("sha256:abcd", 1234, urls=["http://hello/world"]) manifest = builder.build() manifest, tag = registry_model.create_manifest_and_retarget_tag( - repo_ref, manifest, name, storage + repo_ref, manifest, name, storage, raise_on_error=True ) + assert tag + assert tag.name == name @disable_existing_mirrors diff --git a/workers/security_notification_worker.py b/workers/security_notification_worker.py deleted file mode 100644 index 27d1b1cb1..000000000 --- a/workers/security_notification_worker.py +++ /dev/null @@ -1,106 +0,0 @@ -import logging -import time -import json - -import features - -from app import secscan_notification_queue -from data.secscan_model import secscan_model -from workers.queueworker import QueueWorker, JobException -from util.secscan.notifier import SecurityNotificationHandler, ProcessNotificationPageResult - - -logger = logging.getLogger(__name__) - - -_PROCESSING_SECONDS = 60 * 60 # 1 hour -_LAYER_LIMIT = 1000 # The number of layers to request on each page. - - -class SecurityNotificationWorker(QueueWorker): - """ NOTE: This worker is legacy code and should be removed after we've fully moved to Clair V4 - API. - """ - - def process_queue_item(self, data): - self.perform_notification_work(data) - - def perform_notification_work(self, data, layer_limit=_LAYER_LIMIT): - """ - Performs the work for handling a security notification as referenced by the given data - object. - - Returns True on successful handling, False on non-retryable failure and raises a - JobException on retryable failure. - """ - secscan_api = secscan_model.legacy_api_handler - - notification_name = data["Name"] - current_page = data.get("page", None) - handler = SecurityNotificationHandler(secscan_api, layer_limit) - - while True: - # Retrieve the current page of notification data from the security scanner. - (response_data, should_retry) = secscan_api.get_notification( - notification_name, layer_limit=layer_limit, page=current_page - ) - - # If no response, something went wrong. - if response_data is None: - if should_retry: - raise JobException() - else: - # Remove the job from the API. - logger.error("Failed to handle security notification %s", notification_name) - secscan_api.mark_notification_read(notification_name) - - # Return to mark the job as "complete", as we'll never be able to finish it. - return False - - # Extend processing on the queue item so it doesn't expire while we're working. - self.extend_processing(_PROCESSING_SECONDS, json.dumps(data)) - - # Process the notification data. - notification_data = response_data["Notification"] - result = handler.process_notification_page_data(notification_data) - - # Possible states after processing: failed to process, finished processing entirely - # or finished processing the page. - if result == ProcessNotificationPageResult.FAILED: - # Something went wrong. - raise JobException - - if result == ProcessNotificationPageResult.FINISHED_PROCESSING: - # Mark the notification as read. - if not secscan_api.mark_notification_read(notification_name): - # Return to mark the job as "complete", as we'll never be able to finish it. - logger.error("Failed to mark notification %s as read", notification_name) - return False - - # Send the generated Quay notifications. - handler.send_notifications() - return True - - if result == ProcessNotificationPageResult.FINISHED_PAGE: - # Continue onto the next page. - current_page = notification_data["NextPage"] - continue - - -if __name__ == "__main__": - if ( - not features.SECURITY_SCANNER - or not features.SECURITY_NOTIFICATIONS - or not secscan_model.legacy_api_handler - ): - logger.debug("Security scanner disabled; skipping SecurityNotificationWorker") - while True: - time.sleep(100000) - - worker = SecurityNotificationWorker( - secscan_notification_queue, - poll_period_seconds=30, - reservation_seconds=30, - retry_after_seconds=30, - ) - worker.start() diff --git a/workers/test/test_manifestbackfillworker.py b/workers/test/test_manifestbackfillworker.py new file mode 100644 index 000000000..024ce9e55 --- /dev/null +++ b/workers/test/test_manifestbackfillworker.py @@ -0,0 +1,34 @@ +import pytest + +from data import model, database +from image.shared.schemas import parse_manifest_from_bytes, ManifestException +from workers.manifestbackfillworker import ManifestBackfillWorker +from util.bytes import Bytes +from test.fixtures import * + + +def test_basic(initialized_db): + worker = ManifestBackfillWorker() + + # Try with none to backfill. + assert not worker._backfill_manifests() + + # Delete the sizes on some manifest rows. + database.Manifest.update(layers_compressed_size=None).execute() + + # Try the backfill now. + assert worker._backfill_manifests() + + # Ensure the rows were updated and correct. + for manifest_row in database.Manifest.select(): + assert manifest_row.layers_compressed_size is not None + + manifest_bytes = Bytes.for_string_or_unicode(manifest_row.manifest_bytes) + parsed = parse_manifest_from_bytes( + manifest_bytes, manifest_row.media_type.name, validate=False + ) + layers_compressed_size = parsed.layers_compressed_size or 0 + assert manifest_row.layers_compressed_size == layers_compressed_size + assert manifest_row.config_media_type == parsed.config_media_type + + assert not worker._backfill_manifests()