From 0e628b1569aa168434d154f4ed147dda484fcebd Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Tue, 28 Jul 2020 13:03:10 -0400 Subject: [PATCH] Deprecate Image rows and move to in-memory synthesized legacy images [Python 3] (#442) * Change verbs to use a DerivedStorageForManifest table instead of DerivedStorageForImage This allows us to deprecate the DerivedStorageForImage table. Fixes https://issues.redhat.com/browse/PROJQUAY-519 * Change uploaded blob tracking to use its own table and deprecate RepositoryTag * Start recording the compressed layers size and config media type on the manifest row in the database NOTE: This change includes a database migration which will *lock* the manifest table * Change tag API to return the layers size from the manifest * Remove unused code * Add new config_media_type field to OCI types * Fix secscan V2 test for us no longer writing temp images * Remove unused uploading field * Switch registry model to use synthetic legacy images Legacy images are now (with exception of the V2 security model) read from the *manifest* and sythensized in memory. The legacy image IDs are generated realtime based on the hashids library. This change also further deprecates a bunch of our Image APIs, reducing them to only returning the image IDs, and emptying out the remaining metadata (to avoid the requirement of us loading the information for the manifest from storage). This has been tested with our full clients test suite with success. * Add a backfill worker for manifest layers compressed sizes * Change image tracks into manifest tracks now that we no longer have manifest-less tags * Add back in the missing method * Add missing joins to reduce extra queries * Remove unnecessary join when looking up legacy images * Remove extra hidden filter on tag queries * Further DB improvements * Delete all Verbs, as they were deprecated * Add back missing parameter in manifest data type * Fix join to return None for the robot if not defined on mirror config * switch to using secscan_v4_model for all indexing and remove most of secscan_v2_model code * Add a missing join * Remove files accidentally re-added due to rebase * Add back hashids lib * Rebase fixes * Fix broken test * Remove unused GPG signer now that ACI conversion is removed * Remove duplicated repomirrorworker * Remove unused notification code for secscan. We'll re-add it once Clair V4 security notifications are ready to go * Fix formatting * Stop writing Image rows when creating manifests * Stop writing empty layer blobs for manifests As these blobs are shared, we don't need to write ManifestBlob rows for them * Remove further unused code * Add doc comment to _build_blob_map * Add unit test for synthetic V1 IDs * Remove unused import * Add an invalid value test to synthetic ID decode tests * Add manifest backfill worker back in Seems to have been removed at some point * Add a test for cached active tags * Rename test_shared to not conflict with another same-named test file Pytest doesn't like having two test modules with the same name * Have manifestbackfillworker also copy over the config_media_type if present Co-authored-by: alecmerdler --- app.py | 11 +- application.py | 1 - conf/init/supervisord_conf_create.py | 3 +- conf/nginx/http-base.conf | 3 - conf/nginx/server-base.conf.jnj | 13 - conf/supervisord.conf.jnj | 16 - config.py | 7 +- .../js/core-config-setup/core-config-setup.js | 4 - data/database.py | 25 +- .../3383aad1e992-databasemigration.yaml | 51 + .../88e64904d000-databasemigration.yaml | 36 + .../3383aad1e992_add_uploadedblob_table.py | 57 + ...0_add_new_metadata_columns_to_manifest_.py | 39 + data/model/blob.py | 84 +- data/model/gc.py | 58 +- data/model/image.py | 62 - data/model/oauth.py | 9 +- data/model/oci/blob.py | 42 +- data/model/oci/manifest.py | 238 +- data/model/oci/tag.py | 117 +- data/model/oci/test/test_oci_manifest.py | 19 +- data/model/oci/test/test_oci_tag.py | 13 +- data/model/repo_mirror.py | 12 +- data/model/repository.py | 5 +- data/model/storage.py | 87 +- data/model/tag.py | 66 - data/model/test/test_gc.py | 39 +- data/model/test/test_image.py | 109 - data/model/test/test_tag.py | 23 - data/model/user.py | 6 +- data/registry_model/__init__.py | 3 + data/registry_model/datatypes.py | 385 +-- data/registry_model/interface.py | 104 +- data/registry_model/manifestbuilder.py | 4 +- data/registry_model/registry_oci_model.py | 658 +--- data/registry_model/shared.py | 17 + data/registry_model/test/test_interface.py | 291 +- .../test/test_manifestbuilder.py | 3 +- data/registry_model/test/test_model_shared.py | 19 + data/secscan_model/__init__.py | 61 +- data/secscan_model/secscan_v2_model.py | 96 +- data/secscan_model/secscan_v4_model.py | 18 +- .../test/test_secscan_interface.py | 129 +- .../test/test_secscan_v2_model.py | 40 +- .../test/test_secscan_v4_model.py | 100 +- endpoints/api/image.py | 55 +- endpoints/api/manifest.py | 27 +- endpoints/api/repository_models_pre_oci.py | 6 +- endpoints/api/robot_models_pre_oci.py | 2 +- endpoints/api/secscan.py | 3 +- endpoints/api/tag.py | 69 +- endpoints/api/team.py | 2 +- endpoints/api/test/test_deprecated_route.py | 7 +- endpoints/api/test/test_secscan.py | 6 +- endpoints/api/test/test_tag.py | 31 +- endpoints/api/user.py | 2 +- endpoints/appr/models_cnr.py | 3 +- endpoints/secscan.py | 25 +- endpoints/test/test_anon_checked.py | 3 +- endpoints/v1/registry.py | 32 +- endpoints/v1/tag.py | 2 +- endpoints/v2/manifest.py | 2 +- endpoints/v2/test/test_blob.py | 7 +- .../v2/test/test_manifest_cornercases.py | 55 +- endpoints/verbs/__init__.py | 535 --- endpoints/verbs/test/test_security.py | 97 - endpoints/web.py | 12 - health/services.py | 1 - image/appc/__init__.py | 227 -- image/appc/test/test_appc.py | 74 - image/common.py | 89 - image/docker/schema1.py | 16 +- image/docker/schema2/list.py | 22 +- image/docker/schema2/manifest.py | 8 +- image/docker/schema2/test/test_list.py | 21 +- image/docker/schema2/test/test_manifest.py | 4 + image/docker/squashed.py | 149 - image/docker/test/test_schema1.py | 20 +- image/oci/index.py | 22 +- image/oci/manifest.py | 4 + image/oci/test/test_oci_index.py | 45 + image/shared/interfaces.py | 19 + initdb.py | 3 + requirements-nover.txt | 1 + requirements.txt | 2 +- .../repo-view/image-tag-tooltip.html | 11 - .../repo-view/manifest-tag-tooltip.html | 11 + .../directives/repo-view/repo-panel-tags.html | 70 +- .../directives/repo-view/repo-panel-tags.js | 100 +- test/fixtures.py | 2 - test/registry/fixtures.py | 35 +- test/registry/protocol_v1.py | 7 +- test/registry/registry_tests.py | 488 +-- test/test_api_usage.py | 15 +- test/test_secscan.py | 809 +---- test/testconfig.py | 1 - util/config/configutil.py | 9 - util/config/schema.py | 3 +- util/config/validator.py | 2 - .../validators/test/test_validate_signer.py | 24 - util/config/validators/validate_signer.py | 25 - util/registry/aufs.py | 38 - util/registry/gzipwrap.py | 62 - util/registry/queuefile.py | 87 - util/registry/queueprocess.py | 81 - util/registry/streamlayerformat.py | 76 - util/registry/tarlayerformat.py | 202 -- util/registry/test/test_queuefile.py | 118 - util/registry/test/test_streamlayerformat.py | 469 --- util/repomirror/api.py | 2 +- util/secscan/analyzer.py | 239 -- util/secscan/api.py | 316 +- util/secscan/fake.py | 132 - util/secscan/notifier.py | 205 -- util/secscan/v4/test/test_secscan.py | 5 +- util/security/signing.py | 87 - util/security/test/test_signing.py | 28 - util/test/test_workers.py | 4 - util/vendor/__init__.py | 0 util/vendor/paxtarfile.py | 2885 ----------------- util/verifybackfill.py | 83 - verbs.py | 7 - workers/manifestbackfillworker.py | 101 + workers/notificationworker/models_pre_oci.py | 5 +- .../test/test_repomirrorworker.py | 6 +- workers/security_notification_worker.py | 106 - workers/test/test_manifestbackfillworker.py | 34 + 127 files changed, 1881 insertions(+), 10002 deletions(-) create mode 100644 data/migrations/dba_operator/3383aad1e992-databasemigration.yaml create mode 100644 data/migrations/dba_operator/88e64904d000-databasemigration.yaml create mode 100644 data/migrations/versions/3383aad1e992_add_uploadedblob_table.py create mode 100644 data/migrations/versions/88e64904d000_add_new_metadata_columns_to_manifest_.py delete mode 100644 data/model/test/test_image.py delete mode 100644 data/model/test/test_tag.py create mode 100644 data/registry_model/shared.py create mode 100644 data/registry_model/test/test_model_shared.py delete mode 100644 endpoints/verbs/__init__.py delete mode 100644 endpoints/verbs/test/test_security.py delete mode 100644 image/appc/__init__.py delete mode 100644 image/appc/test/test_appc.py delete mode 100644 image/common.py delete mode 100644 image/docker/squashed.py delete mode 100644 static/directives/repo-view/image-tag-tooltip.html create mode 100644 static/directives/repo-view/manifest-tag-tooltip.html delete mode 100644 util/config/validators/test/test_validate_signer.py delete mode 100644 util/config/validators/validate_signer.py delete mode 100644 util/registry/aufs.py delete mode 100644 util/registry/gzipwrap.py delete mode 100644 util/registry/queuefile.py delete mode 100644 util/registry/queueprocess.py delete mode 100644 util/registry/streamlayerformat.py delete mode 100644 util/registry/tarlayerformat.py delete mode 100644 util/registry/test/test_queuefile.py delete mode 100644 util/registry/test/test_streamlayerformat.py delete mode 100644 util/secscan/analyzer.py delete mode 100644 util/secscan/notifier.py delete mode 100644 util/security/signing.py delete mode 100644 util/security/test/test_signing.py delete mode 100644 util/vendor/__init__.py delete mode 100644 util/vendor/paxtarfile.py delete mode 100644 util/verifybackfill.py delete mode 100644 verbs.py create mode 100644 workers/manifestbackfillworker.py delete mode 100644 workers/security_notification_worker.py create mode 100644 workers/test/test_manifestbackfillworker.py diff --git a/app.py b/app.py index 689ce711b..07a85467d 100644 --- a/app.py +++ b/app.py @@ -66,7 +66,6 @@ from util.metrics.prometheus import PrometheusPlugin from util.repomirror.api import RepoMirrorAPI from util.tufmetadata.api import TUFMetadataAPI from util.security.instancekeys import InstanceKeys -from util.security.signing import Signer from util.greenlet_tracing import enable_tracing @@ -244,7 +243,6 @@ build_logs = BuildLogs(app) authentication = UserAuthentication(app, config_provider, OVERRIDE_CONFIG_DIRECTORY) userevents = UserEventsBuilderModule(app) superusers = SuperUserManager(app) -signer = Signer(app, config_provider) instance_keys = InstanceKeys(app) label_validator = LabelValidator(app) build_canceller = BuildCanceller(app) @@ -260,9 +258,6 @@ dockerfile_build_queue = WorkQueue( app.config["DOCKERFILE_BUILD_QUEUE_NAME"], tf, has_namespace=True ) notification_queue = WorkQueue(app.config["NOTIFICATION_QUEUE_NAME"], tf, has_namespace=True) -secscan_notification_queue = WorkQueue( - app.config["SECSCAN_NOTIFICATION_QUEUE_NAME"], tf, has_namespace=False -) export_action_logs_queue = WorkQueue( app.config["EXPORT_ACTION_LOGS_QUEUE_NAME"], tf, has_namespace=True ) @@ -277,7 +272,6 @@ all_queues = [ image_replication_queue, dockerfile_build_queue, notification_queue, - secscan_notification_queue, chunk_cleanup_queue, repository_gc_queue, namespace_gc_queue, @@ -315,10 +309,13 @@ model.config.store = storage model.config.register_repo_cleanup_callback(tuf_metadata_api.delete_metadata) secscan_model.configure(app, instance_keys, storage) -secscan_model.register_model_cleanup_callbacks(model.config) logs_model.configure(app.config) +# NOTE: We re-use the page token key here as this is just to obfuscate IDs for V1, and +# does not need to actually be secure. +registry_model.set_id_hash_salt(app.config.get("PAGE_TOKEN_KEY")) + @login_manager.user_loader def load_user(user_uuid): diff --git a/application.py b/application.py index 1a0c799fa..0a1215c0f 100644 --- a/application.py +++ b/application.py @@ -13,7 +13,6 @@ from app import app as application # Bind all of the blueprints import web -import verbs import registry import secscan diff --git a/conf/init/supervisord_conf_create.py b/conf/init/supervisord_conf_create.py index 8b1ab1ac9..9a9f12158 100644 --- a/conf/init/supervisord_conf_create.py +++ b/conf/init/supervisord_conf_create.py @@ -29,14 +29,12 @@ def default_services(): "notificationworker": {"autostart": "true"}, "queuecleanupworker": {"autostart": "true"}, "repositoryactioncounter": {"autostart": "true"}, - "security_notification_worker": {"autostart": "true"}, "securityworker": {"autostart": "true"}, "storagereplication": {"autostart": "true"}, "teamsyncworker": {"autostart": "true"}, "dnsmasq": {"autostart": "true"}, "gunicorn-registry": {"autostart": "true"}, "gunicorn-secscan": {"autostart": "true"}, - "gunicorn-verbs": {"autostart": "true"}, "gunicorn-web": {"autostart": "true"}, "ip-resolver-update-worker": {"autostart": "true"}, "jwtproxy": {"autostart": "true"}, @@ -45,6 +43,7 @@ def default_services(): "pushgateway": {"autostart": "true"}, "servicekey": {"autostart": "true"}, "repomirrorworker": {"autostart": "false"}, + "backfillmanifestworker": {"autostart": "false"}, } diff --git a/conf/nginx/http-base.conf b/conf/nginx/http-base.conf index 7b4f0ef28..70399c9ad 100644 --- a/conf/nginx/http-base.conf +++ b/conf/nginx/http-base.conf @@ -49,9 +49,6 @@ upstream web_app_server { upstream jwtproxy_secscan { server unix:/tmp/jwtproxy_secscan.sock fail_timeout=0; } -upstream verbs_app_server { - server unix:/tmp/gunicorn_verbs.sock fail_timeout=0; -} upstream registry_app_server { server unix:/tmp/gunicorn_registry.sock fail_timeout=0; } diff --git a/conf/nginx/server-base.conf.jnj b/conf/nginx/server-base.conf.jnj index e01a47a5f..2e411bf8a 100644 --- a/conf/nginx/server-base.conf.jnj +++ b/conf/nginx/server-base.conf.jnj @@ -306,19 +306,6 @@ location = /v1/_ping { return 200 'true'; } -location /c1/ { - proxy_buffering off; - - proxy_request_buffering off; - - proxy_pass http://verbs_app_server; - proxy_temp_path /tmp 1 2; - - {% if enable_rate_limits %} - limit_req zone=staticauth burst=5 nodelay; - {% endif %} -} - location /static/ { # checks for static file, if not found proxy to app alias {{static_dir}}/; diff --git a/conf/supervisord.conf.jnj b/conf/supervisord.conf.jnj index edb2d1ee1..50edf8fc6 100644 --- a/conf/supervisord.conf.jnj +++ b/conf/supervisord.conf.jnj @@ -138,14 +138,6 @@ autostart = {{ config['repositoryactioncounter']['autostart'] }} stdout_events_enabled = true stderr_events_enabled = true -[program:security_notification_worker] -environment= - PYTHONPATH=%(ENV_QUAYDIR)s -command=python -m workers.security_notification_worker -autostart = {{ config['security_notification_worker']['autostart'] }} -stdout_events_enabled = true -stderr_events_enabled = true - [program:securityworker] environment= PYTHONPATH=%(ENV_QUAYDIR)s @@ -194,14 +186,6 @@ autostart = {{ config['gunicorn-secscan']['autostart'] }} stdout_events_enabled = true stderr_events_enabled = true -[program:gunicorn-verbs] -environment= - PYTHONPATH=%(ENV_QUAYDIR)s -command=nice -n 10 gunicorn -c %(ENV_QUAYCONF)s/gunicorn_verbs.py verbs:application -autostart = {{ config['gunicorn-verbs']['autostart'] }} -stdout_events_enabled = true -stderr_events_enabled = true - [program:gunicorn-web] environment= PYTHONPATH=%(ENV_QUAYDIR)s diff --git a/config.py b/config.py index 1e1c30ba2..82486ac89 100644 --- a/config.py +++ b/config.py @@ -259,7 +259,6 @@ class DefaultConfig(ImmutableConfig): NOTIFICATION_QUEUE_NAME = "notification" DOCKERFILE_BUILD_QUEUE_NAME = "dockerfilebuild" REPLICATION_QUEUE_NAME = "imagestoragereplication" - SECSCAN_NOTIFICATION_QUEUE_NAME = "security_notification" CHUNK_CLEANUP_QUEUE_NAME = "chunk_cleanup" NAMESPACE_GC_QUEUE_NAME = "namespacegc" REPOSITORY_GC_QUEUE_NAME = "repositorygc" @@ -476,9 +475,6 @@ class DefaultConfig(ImmutableConfig): # The version of the API to use for the security scanner. SECURITY_SCANNER_API_VERSION = "v1" - # Namespace whitelist for security scanner. - SECURITY_SCANNER_V4_NAMESPACE_WHITELIST = [] - # Minimum number of seconds before re-indexing a manifest with the security scanner. SECURITY_SCANNER_V4_REINDEX_THRESHOLD = 300 @@ -739,3 +735,6 @@ class DefaultConfig(ImmutableConfig): # Feature Flag: Whether the repository action count worker is enabled. FEATURE_REPOSITORY_ACTION_COUNTER = True + + # TEMP FEATURE: Backfill the sizes of manifests. + FEATURE_MANIFEST_SIZE_BACKFILL = True diff --git a/config_app/js/core-config-setup/core-config-setup.js b/config_app/js/core-config-setup/core-config-setup.js index 67d27cc45..dafe0eb2d 100644 --- a/config_app/js/core-config-setup/core-config-setup.js +++ b/config_app/js/core-config-setup/core-config-setup.js @@ -74,10 +74,6 @@ angular.module("quay-config") return config.AUTHENTICATION_TYPE == 'AppToken'; }}, - {'id': 'signer', 'title': 'ACI Signing', 'condition': function(config) { - return config.FEATURE_ACI_CONVERSION; - }}, - {'id': 'github-login', 'title': 'Github (Enterprise) Authentication', 'condition': function(config) { return config.FEATURE_GITHUB_LOGIN; }}, diff --git a/data/database.py b/data/database.py index d522e71d7..c56863a7b 100644 --- a/data/database.py +++ b/data/database.py @@ -685,6 +685,7 @@ class User(BaseModel): NamespaceGeoRestriction, ManifestSecurityStatus, RepoMirrorConfig, + UploadedBlob, } | appr_classes | v22_classes @@ -888,6 +889,7 @@ class Repository(BaseModel): RepoMirrorRule, DeletedRepository, ManifestSecurityStatus, + UploadedBlob, } | appr_classes | v22_classes @@ -1115,6 +1117,7 @@ class Image(BaseModel): return list(map(int, self.ancestors.split("/")[1:-1])) +@deprecated_model class DerivedStorageForImage(BaseModel): source_image = ForeignKeyField(Image) derivative = ForeignKeyField(ImageStorage) @@ -1127,6 +1130,7 @@ class DerivedStorageForImage(BaseModel): indexes = ((("source_image", "transformation", "uniqueness_hash"), True),) +@deprecated_model class RepositoryTag(BaseModel): name = CharField() image = ForeignKeyField(Image) @@ -1391,8 +1395,8 @@ class ExternalNotificationMethod(BaseModel): class RepositoryNotification(BaseModel): uuid = CharField(default=uuid_generator, index=True) repository = ForeignKeyField(Repository) - event = ForeignKeyField(ExternalNotificationEvent) - method = ForeignKeyField(ExternalNotificationMethod) + event = EnumField(ExternalNotificationEvent) + method = EnumField(ExternalNotificationMethod) title = CharField(null=True) config_json = TextField() event_config_json = TextField(default="{}") @@ -1414,6 +1418,19 @@ class RepositoryAuthorizedEmail(BaseModel): ) +class UploadedBlob(BaseModel): + """ + UploadedBlob tracks a recently uploaded blob and prevents it from being GCed + while within the expiration window. + """ + + id = BigAutoField() + repository = ForeignKeyField(Repository) + blob = ForeignKeyField(ImageStorage) + uploaded_at = DateTimeField(default=datetime.utcnow) + expires_at = DateTimeField(index=True) + + class BlobUpload(BaseModel): repository = ForeignKeyField(Repository) uuid = CharField(index=True, unique=True) @@ -1699,12 +1716,16 @@ class Manifest(BaseModel): media_type = EnumField(MediaType) manifest_bytes = TextField() + config_media_type = CharField(null=True) + layers_compressed_size = BigIntegerField(null=True) + class Meta: database = db read_only_config = read_only_config indexes = ( (("repository", "digest"), True), (("repository", "media_type"), False), + (("repository", "config_media_type"), False), ) diff --git a/data/migrations/dba_operator/3383aad1e992-databasemigration.yaml b/data/migrations/dba_operator/3383aad1e992-databasemigration.yaml new file mode 100644 index 000000000..df2173233 --- /dev/null +++ b/data/migrations/dba_operator/3383aad1e992-databasemigration.yaml @@ -0,0 +1,51 @@ + +--- +apiVersion: dbaoperator.app-sre.redhat.com/v1alpha1 +kind: DatabaseMigration +metadata: + name: 3383aad1e992 +spec: + migrationContainerSpec: + command: + - /quay-registry/quay-entrypoint.sh + - migrate + - 3383aad1e992 + image: quay.io/quay/quay + name: 3383aad1e992 + previous: !!python/tuple + - 04b9d2191450 + schemaHints: + - columns: + - name: id + nullable: false + - name: repository_id + nullable: false + - name: blob_id + nullable: false + - name: uploaded_at + nullable: false + - name: expires_at + nullable: false + operation: createTable + table: uploadedblob + - columns: + - name: blob_id + nullable: false + indexName: uploadedblob_blob_id + indexType: index + operation: createIndex + table: uploadedblob + - columns: + - name: expires_at + nullable: false + indexName: uploadedblob_expires_at + indexType: index + operation: createIndex + table: uploadedblob + - columns: + - name: repository_id + nullable: false + indexName: uploadedblob_repository_id + indexType: index + operation: createIndex + table: uploadedblob diff --git a/data/migrations/dba_operator/88e64904d000-databasemigration.yaml b/data/migrations/dba_operator/88e64904d000-databasemigration.yaml new file mode 100644 index 000000000..36d628462 --- /dev/null +++ b/data/migrations/dba_operator/88e64904d000-databasemigration.yaml @@ -0,0 +1,36 @@ + +--- +apiVersion: dbaoperator.app-sre.redhat.com/v1alpha1 +kind: DatabaseMigration +metadata: + name: 88e64904d000 +spec: + migrationContainerSpec: + command: + - /quay-registry/quay-entrypoint.sh + - migrate + - 88e64904d000 + image: quay.io/quay/quay + name: 88e64904d000 + previous: !!python/tuple + - 3383aad1e992 + schemaHints: + - columns: + - name: config_media_type + nullable: true + operation: addColumn + table: manifest + - columns: + - name: layers_compressed_size + nullable: true + operation: addColumn + table: manifest + - columns: + - name: repository_id + nullable: false + - name: config_media_type + nullable: true + indexName: manifest_repository_id_config_media_type + indexType: index + operation: createIndex + table: manifest diff --git a/data/migrations/versions/3383aad1e992_add_uploadedblob_table.py b/data/migrations/versions/3383aad1e992_add_uploadedblob_table.py new file mode 100644 index 000000000..3ebf2bce6 --- /dev/null +++ b/data/migrations/versions/3383aad1e992_add_uploadedblob_table.py @@ -0,0 +1,57 @@ +"""Add UploadedBlob table + +Revision ID: 3383aad1e992 +Revises: 04b9d2191450 +Create Date: 2020-04-21 11:45:54.837077 + +""" + +# revision identifiers, used by Alembic. +revision = "3383aad1e992" +down_revision = "04b9d2191450" + +import sqlalchemy as sa +from sqlalchemy.dialects import mysql + + +def upgrade(op, tables, tester): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "uploadedblob", + sa.Column("id", sa.BigInteger(), nullable=False), + sa.Column("repository_id", sa.Integer(), nullable=False), + sa.Column("blob_id", sa.Integer(), nullable=False), + sa.Column("uploaded_at", sa.DateTime(), nullable=False), + sa.Column("expires_at", sa.DateTime(), nullable=False), + sa.ForeignKeyConstraint( + ["blob_id"], ["imagestorage.id"], name=op.f("fk_uploadedblob_blob_id_imagestorage") + ), + sa.ForeignKeyConstraint( + ["repository_id"], + ["repository.id"], + name=op.f("fk_uploadedblob_repository_id_repository"), + ), + sa.PrimaryKeyConstraint("id", name=op.f("pk_uploadedblob")), + ) + op.create_index("uploadedblob_blob_id", "uploadedblob", ["blob_id"], unique=False) + op.create_index("uploadedblob_expires_at", "uploadedblob", ["expires_at"], unique=False) + op.create_index("uploadedblob_repository_id", "uploadedblob", ["repository_id"], unique=False) + # ### end Alembic commands ### + + # ### population of test data ### # + tester.populate_table( + "uploadedblob", + [ + ("repository_id", tester.TestDataType.Foreign("repository")), + ("blob_id", tester.TestDataType.Foreign("imagestorage")), + ("uploaded_at", tester.TestDataType.DateTime), + ("expires_at", tester.TestDataType.DateTime), + ], + ) + # ### end population of test data ### # + + +def downgrade(op, tables, tester): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table("uploadedblob") + # ### end Alembic commands ### diff --git a/data/migrations/versions/88e64904d000_add_new_metadata_columns_to_manifest_.py b/data/migrations/versions/88e64904d000_add_new_metadata_columns_to_manifest_.py new file mode 100644 index 000000000..3138289c1 --- /dev/null +++ b/data/migrations/versions/88e64904d000_add_new_metadata_columns_to_manifest_.py @@ -0,0 +1,39 @@ +"""Add new metadata columns to Manifest table + +Revision ID: 88e64904d000 +Revises: 3383aad1e992 +Create Date: 2020-04-21 14:00:50.376517 + +""" + +# revision identifiers, used by Alembic. +revision = "88e64904d000" +down_revision = "3383aad1e992" + +import sqlalchemy as sa + + +def upgrade(op, tables, tester): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column("manifest", sa.Column("config_media_type", sa.String(length=255), nullable=True)) + op.add_column("manifest", sa.Column("layers_compressed_size", sa.BigInteger(), nullable=True)) + op.create_index( + "manifest_repository_id_config_media_type", + "manifest", + ["repository_id", "config_media_type"], + unique=False, + ) + # ### end Alembic commands ### + + # ### population of test data ### # + tester.populate_column("manifest", "config_media_type", tester.TestDataType.String) + tester.populate_column("manifest", "layers_compressed_size", tester.TestDataType.Integer) + # ### end population of test data ### # + + +def downgrade(op, tables, tester): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_index("manifest_repository_id_config_media_type", table_name="manifest") + op.drop_column("manifest", "layers_compressed_size") + op.drop_column("manifest", "config_media_type") + # ### end Alembic commands ### diff --git a/data/model/blob.py b/data/model/blob.py index 53a1816c8..70ee09793 100644 --- a/data/model/blob.py +++ b/data/model/blob.py @@ -1,6 +1,6 @@ import logging -from datetime import datetime +from datetime import datetime, timedelta from uuid import uuid4 from data.model import ( @@ -14,11 +14,13 @@ from data.model import ( ) from data.database import ( Repository, + RepositoryState, Namespace, ImageStorage, Image, ImageStoragePlacement, BlobUpload, + UploadedBlob, ImageStorageLocation, db_random_func, ) @@ -27,53 +29,6 @@ from data.database import ( logger = logging.getLogger(__name__) -def get_repository_blob_by_digest(repository, blob_digest): - """ - Find the content-addressable blob linked to the specified repository. - """ - assert blob_digest - try: - storage = ( - ImageStorage.select(ImageStorage.uuid) - .join(Image) - .where( - Image.repository == repository, - ImageStorage.content_checksum == blob_digest, - ImageStorage.uploading == False, - ) - .get() - ) - - return storage_model.get_storage_by_uuid(storage.uuid) - except (ImageStorage.DoesNotExist, InvalidImageException): - raise BlobDoesNotExist("Blob does not exist with digest: {0}".format(blob_digest)) - - -def get_repo_blob_by_digest(namespace, repo_name, blob_digest): - """ - Find the content-addressable blob linked to the specified repository. - """ - assert blob_digest - try: - storage = ( - ImageStorage.select(ImageStorage.uuid) - .join(Image) - .join(Repository) - .join(Namespace, on=(Namespace.id == Repository.namespace_user)) - .where( - Repository.name == repo_name, - Namespace.username == namespace, - ImageStorage.content_checksum == blob_digest, - ImageStorage.uploading == False, - ) - .get() - ) - - return storage_model.get_storage_by_uuid(storage.uuid) - except (ImageStorage.DoesNotExist, InvalidImageException): - raise BlobDoesNotExist("Blob does not exist with digest: {0}".format(blob_digest)) - - def store_blob_record_and_temp_link( namespace, repo_name, @@ -157,16 +112,26 @@ def temp_link_blob(repository_id, blob_digest, link_expiration_s): def _temp_link_blob(repository_id, storage, link_expiration_s): """ Note: Should *always* be called by a parent under a transaction. """ - random_image_name = str(uuid4()) + try: + repository = Repository.get(id=repository_id) + except Repository.DoesNotExist: + return None - # Create a temporary link into the repository, to be replaced by the v1 metadata later - # and create a temporary tag to reference it - image = Image.create( - storage=storage, docker_image_id=random_image_name, repository=repository_id + if repository.state == RepositoryState.MARKED_FOR_DELETION: + return None + + return UploadedBlob.create( + repository=repository_id, + blob=storage, + expires_at=datetime.utcnow() + timedelta(seconds=link_expiration_s), + ) + + +def lookup_expired_uploaded_blobs(repository): + """ Looks up all expired uploaded blobs in a repository. """ + return UploadedBlob.select().where( + UploadedBlob.repository == repository, UploadedBlob.expires_at <= datetime.utcnow() ) - temp_tag = tag.create_temporary_hidden_tag(repository_id, image, link_expiration_s) - if temp_tag is None: - image.delete_instance() def get_stale_blob_upload(stale_timespan): @@ -192,7 +157,12 @@ def get_blob_upload_by_uuid(upload_uuid): Loads the upload with the given UUID, if any. """ try: - return BlobUpload.select().where(BlobUpload.uuid == upload_uuid).get() + return ( + BlobUpload.select(BlobUpload, ImageStorageLocation) + .join(ImageStorageLocation) + .where(BlobUpload.uuid == upload_uuid) + .get() + ) except BlobUpload.DoesNotExist: return None diff --git a/data/model/gc.py b/data/model/gc.py index f459bedc4..d4f316fe0 100644 --- a/data/model/gc.py +++ b/data/model/gc.py @@ -1,8 +1,9 @@ import logging from peewee import fn, IntegrityError +from datetime import datetime -from data.model import config, db_transaction, storage, _basequery, tag as pre_oci_tag +from data.model import config, db_transaction, storage, _basequery, tag as pre_oci_tag, blob from data.model.oci import tag as oci_tag from data.database import Repository, db_for_update from data.database import ApprTag @@ -28,8 +29,14 @@ from data.database import ( RepoMirrorConfig, RepositoryPermission, RepositoryAuthorizedEmail, + UploadedBlob, +) +from data.database import ( + RepositoryTag, + TagManifest, + Image, + DerivedStorageForImage, ) -from data.database import RepositoryTag, TagManifest, Image, DerivedStorageForImage from data.database import TagManifestToManifest, TagToRepositoryTag, TagManifestLabelMap logger = logging.getLogger(__name__) @@ -98,6 +105,7 @@ def purge_repository(repo, force=False): assert RepositoryTag.select().where(RepositoryTag.repository == repo).count() == 0 assert Manifest.select().where(Manifest.repository == repo).count() == 0 assert ManifestBlob.select().where(ManifestBlob.repository == repo).count() == 0 + assert UploadedBlob.select().where(UploadedBlob.repository == repo).count() == 0 assert ( ManifestSecurityStatus.select().where(ManifestSecurityStatus.repository == repo).count() == 0 @@ -194,7 +202,27 @@ def _purge_repository_contents(repo): if not found: break - # TODO: remove this once we're fully on the OCI data model. + # Purge any uploaded blobs that have expired. + while True: + found = False + for uploaded_blobs in _chunk_iterate_for_deletion( + UploadedBlob.select().where(UploadedBlob.repository == repo) + ): + logger.debug( + "Found %s uploaded blobs to GC under repository %s", len(uploaded_blobs), repo + ) + found = True + context = _GarbageCollectorContext(repo) + for uploaded_blob in uploaded_blobs: + logger.debug("Deleting uploaded blob %s under repository %s", uploaded_blob, repo) + assert uploaded_blob.repository_id == repo.id + _purge_uploaded_blob(uploaded_blob, context, allow_non_expired=True) + + if not found: + break + + # TODO: remove this once we've removed the foreign key constraints from RepositoryTag + # and Image. while True: found = False repo_tag_query = RepositoryTag.select().where(RepositoryTag.repository == repo) @@ -217,6 +245,7 @@ def _purge_repository_contents(repo): assert RepositoryTag.select().where(RepositoryTag.repository == repo).count() == 0 assert Manifest.select().where(Manifest.repository == repo).count() == 0 assert ManifestBlob.select().where(ManifestBlob.repository == repo).count() == 0 + assert UploadedBlob.select().where(UploadedBlob.repository == repo).count() == 0 # Add all remaining images to a new context. We do this here to minimize the number of images # we need to load. @@ -259,6 +288,7 @@ def garbage_collect_repo(repo): _run_garbage_collection(context) had_changes = True + # TODO: Remove once we've removed the foreign key constraints from RepositoryTag and Image. for tags in _chunk_iterate_for_deletion(pre_oci_tag.lookup_unrecoverable_tags(repo)): logger.debug("Found %s tags to GC under repository %s", len(tags), repo) context = _GarbageCollectorContext(repo) @@ -271,6 +301,18 @@ def garbage_collect_repo(repo): _run_garbage_collection(context) had_changes = True + # Purge expired uploaded blobs. + for uploaded_blobs in _chunk_iterate_for_deletion(blob.lookup_expired_uploaded_blobs(repo)): + logger.debug("Found %s uploaded blobs to GC under repository %s", len(uploaded_blobs), repo) + context = _GarbageCollectorContext(repo) + for uploaded_blob in uploaded_blobs: + logger.debug("Deleting uploaded blob %s under repository %s", uploaded_blob, repo) + assert uploaded_blob.repository_id == repo.id + _purge_uploaded_blob(uploaded_blob, context) + + _run_garbage_collection(context) + had_changes = True + return had_changes @@ -376,6 +418,16 @@ def _purge_pre_oci_tag(tag, context, allow_non_expired=False): reloaded_tag.delete_instance() +def _purge_uploaded_blob(uploaded_blob, context, allow_non_expired=False): + assert allow_non_expired or uploaded_blob.expires_at <= datetime.utcnow() + + # Add the storage to be checked. + context.add_blob_id(uploaded_blob.blob_id) + + # Delete the uploaded blob. + uploaded_blob.delete_instance() + + def _check_manifest_used(manifest_id): assert manifest_id is not None diff --git a/data/model/image.py b/data/model/image.py index 2a6f8115e..40c5931bb 100644 --- a/data/model/image.py +++ b/data/model/image.py @@ -23,13 +23,10 @@ from data.database import ( ImageStorage, ImageStorageLocation, RepositoryPermission, - DerivedStorageForImage, ImageStorageTransformation, User, ) -from util.canonicaljson import canonicalize - logger = logging.getLogger(__name__) @@ -554,62 +551,3 @@ def set_secscan_status(image, indexed, version): .where((Image.security_indexed_engine != version) | (Image.security_indexed != indexed)) .execute() ) != 0 - - -def _get_uniqueness_hash(varying_metadata): - if not varying_metadata: - return None - - return hashlib.sha256(json.dumps(canonicalize(varying_metadata)).encode("utf-8")).hexdigest() - - -def find_or_create_derived_storage( - source_image, transformation_name, preferred_location, varying_metadata=None -): - existing = find_derived_storage_for_image(source_image, transformation_name, varying_metadata) - if existing is not None: - return existing - - uniqueness_hash = _get_uniqueness_hash(varying_metadata) - trans = ImageStorageTransformation.get(name=transformation_name) - new_storage = storage.create_v1_storage(preferred_location) - - try: - derived = DerivedStorageForImage.create( - source_image=source_image, - derivative=new_storage, - transformation=trans, - uniqueness_hash=uniqueness_hash, - ) - except IntegrityError: - # Storage was created while this method executed. Just return the existing. - ImageStoragePlacement.delete().where(ImageStoragePlacement.storage == new_storage).execute() - new_storage.delete_instance() - return find_derived_storage_for_image(source_image, transformation_name, varying_metadata) - - return derived - - -def find_derived_storage_for_image(source_image, transformation_name, varying_metadata=None): - uniqueness_hash = _get_uniqueness_hash(varying_metadata) - - try: - found = ( - DerivedStorageForImage.select(ImageStorage, DerivedStorageForImage) - .join(ImageStorage) - .switch(DerivedStorageForImage) - .join(ImageStorageTransformation) - .where( - DerivedStorageForImage.source_image == source_image, - ImageStorageTransformation.name == transformation_name, - DerivedStorageForImage.uniqueness_hash == uniqueness_hash, - ) - .get() - ) - return found - except DerivedStorageForImage.DoesNotExist: - return None - - -def delete_derived_storage(derived_storage): - derived_storage.derivative.delete_instance(recursive=True) diff --git a/data/model/oauth.py b/data/model/oauth.py index faec5ec5c..62808ea10 100644 --- a/data/model/oauth.py +++ b/data/model/oauth.py @@ -352,8 +352,13 @@ def lookup_access_token_by_uuid(token_uuid): def lookup_access_token_for_user(user_obj, token_uuid): try: - return OAuthAccessToken.get( - OAuthAccessToken.authorized_user == user_obj, OAuthAccessToken.uuid == token_uuid + return ( + OAuthAccessToken.select(OAuthAccessToken, User) + .join(User) + .where( + OAuthAccessToken.authorized_user == user_obj, OAuthAccessToken.uuid == token_uuid + ) + .get() ) except OAuthAccessToken.DoesNotExist: return None diff --git a/data/model/oci/blob.py b/data/model/oci/blob.py index 2d4f789ba..5ed9796aa 100644 --- a/data/model/oci/blob.py +++ b/data/model/oci/blob.py @@ -1,7 +1,6 @@ -from data.database import ImageStorage, ManifestBlob +from data.database import ImageStorage, ManifestBlob, UploadedBlob from data.model import BlobDoesNotExist from data.model.storage import get_storage_by_uuid, InvalidImageException -from data.model.blob import get_repository_blob_by_digest as legacy_get def get_repository_blob_by_digest(repository, blob_digest): @@ -9,8 +8,34 @@ def get_repository_blob_by_digest(repository, blob_digest): Find the content-addressable blob linked to the specified repository and returns it or None if none. """ + # First try looking for a recently uploaded blob. If none found that is matching, + # check the repository itself. + storage = _lookup_blob_uploaded(repository, blob_digest) + if storage is None: + storage = _lookup_blob_in_repository(repository, blob_digest) + + return get_storage_by_uuid(storage.uuid) if storage is not None else None + + +def _lookup_blob_uploaded(repository, blob_digest): try: - storage = ( + return ( + ImageStorage.select(ImageStorage.uuid) + .join(UploadedBlob) + .where( + UploadedBlob.repository == repository, + ImageStorage.content_checksum == blob_digest, + ImageStorage.uploading == False, + ) + .get() + ) + except ImageStorage.DoesNotExist: + return None + + +def _lookup_blob_in_repository(repository, blob_digest): + try: + return ( ImageStorage.select(ImageStorage.uuid) .join(ManifestBlob) .where( @@ -20,12 +45,5 @@ def get_repository_blob_by_digest(repository, blob_digest): ) .get() ) - - return get_storage_by_uuid(storage.uuid) - except (ImageStorage.DoesNotExist, InvalidImageException): - # TODO: Remove once we are no longer using the legacy tables. - # Try the legacy call. - try: - return legacy_get(repository, blob_digest) - except BlobDoesNotExist: - return None + except ImageStorage.DoesNotExist: + return None diff --git a/data/model/oci/manifest.py b/data/model/oci/manifest.py index de99b1bf5..d8c2797f4 100644 --- a/data/model/oci/manifest.py +++ b/data/model/oci/manifest.py @@ -1,4 +1,6 @@ +import json import logging +import os from collections import namedtuple @@ -10,6 +12,10 @@ from data.database import ( ManifestBlob, ManifestLegacyImage, ManifestChild, + ImageStorage, + ImageStoragePlacement, + ImageStorageTransformation, + ImageStorageSignature, db_transaction, ) from data.model import BlobDoesNotExist @@ -17,11 +23,12 @@ from data.model.blob import get_or_create_shared_blob, get_shared_blob from data.model.oci.tag import filter_to_alive_tags, create_temporary_tag_if_necessary from data.model.oci.label import create_manifest_label from data.model.oci.retriever import RepositoryContentRetriever -from data.model.storage import lookup_repo_storages_by_content_checksum +from data.model.storage import lookup_repo_storages_by_content_checksum, create_v1_storage from data.model.image import lookup_repository_images, get_image, synthesize_v1_image from image.docker.schema2 import EMPTY_LAYER_BLOB_DIGEST, EMPTY_LAYER_BYTES from image.docker.schema1 import ManifestException from image.docker.schema2.list import MalformedSchema2ManifestList +from util.canonicaljson import canonicalize from util.validation import is_json @@ -206,91 +213,17 @@ def _create_manifest( child_manifest_rows[child_manifest_info.manifest.digest] = child_manifest_info.manifest child_manifest_label_dicts.append(labels) - # Ensure all the blobs in the manifest exist. - digests = set(manifest_interface_instance.local_blob_digests) - blob_map = {} - - # If the special empty layer is required, simply load it directly. This is much faster - # than trying to load it on a per repository basis, and that is unnecessary anyway since - # this layer is predefined. - if EMPTY_LAYER_BLOB_DIGEST in digests: - digests.remove(EMPTY_LAYER_BLOB_DIGEST) - blob_map[EMPTY_LAYER_BLOB_DIGEST] = get_shared_blob(EMPTY_LAYER_BLOB_DIGEST) - if not blob_map[EMPTY_LAYER_BLOB_DIGEST]: - if raise_on_error: - raise CreateManifestException("Unable to retrieve specialized empty blob") - - logger.warning("Could not find the special empty blob in storage") - return None - - if digests: - query = lookup_repo_storages_by_content_checksum(repository_id, digests) - blob_map.update({s.content_checksum: s for s in query}) - for digest_str in digests: - if digest_str not in blob_map: - logger.warning( - "Unknown blob `%s` under manifest `%s` for repository `%s`", - digest_str, - manifest_interface_instance.digest, - repository_id, - ) - - if raise_on_error: - raise CreateManifestException("Unknown blob `%s`" % digest_str) - - return None - - # Special check: If the empty layer blob is needed for this manifest, add it to the - # blob map. This is necessary because Docker decided to elide sending of this special - # empty layer in schema version 2, but we need to have it referenced for GC and schema version 1. - if EMPTY_LAYER_BLOB_DIGEST not in blob_map: - try: - requires_empty_layer = manifest_interface_instance.get_requires_empty_layer_blob( - retriever - ) - except ManifestException as ex: - if raise_on_error: - raise CreateManifestException(str(ex)) - - return None - - if requires_empty_layer is None: - if raise_on_error: - raise CreateManifestException("Could not load configuration blob") - - return None - - if requires_empty_layer: - shared_blob = get_or_create_shared_blob( - EMPTY_LAYER_BLOB_DIGEST, EMPTY_LAYER_BYTES, storage - ) - assert not shared_blob.uploading - assert shared_blob.content_checksum == EMPTY_LAYER_BLOB_DIGEST - blob_map[EMPTY_LAYER_BLOB_DIGEST] = shared_blob - - # Determine and populate the legacy image if necessary. Manifest lists will not have a legacy - # image. - legacy_image = None - if manifest_interface_instance.has_legacy_image: - try: - legacy_image_id = _populate_legacy_image( - repository_id, manifest_interface_instance, blob_map, retriever, raise_on_error - ) - except ManifestException as me: - logger.error("Got manifest error when populating legacy images: %s", me) - if raise_on_error: - raise CreateManifestException( - "Attempt to create an invalid manifest: %s. Please report this issue." % me - ) - - return None - - if legacy_image_id is None: - return None - - legacy_image = get_image(repository_id, legacy_image_id) - if legacy_image is None: - return None + # Build the map from required blob digests to the blob objects. + blob_map = _build_blob_map( + repository_id, + manifest_interface_instance, + retriever, + storage, + raise_on_error, + require_empty_layer=False, + ) + if blob_map is None: + return None # Create the manifest and its blobs. media_type = Manifest.media_type.get_id(manifest_interface_instance.media_type) @@ -314,6 +247,8 @@ def _create_manifest( digest=manifest_interface_instance.digest, media_type=media_type, manifest_bytes=manifest_interface_instance.bytes.as_encoded_str(), + config_media_type=manifest_interface_instance.config_media_type, + layers_compressed_size=manifest_interface_instance.layers_compressed_size, ) except IntegrityError as ie: try: @@ -339,12 +274,6 @@ def _create_manifest( if blobs_to_insert: ManifestBlob.insert_many(blobs_to_insert).execute() - # Set the legacy image (if applicable). - if legacy_image is not None: - ManifestLegacyImage.create( - repository=repository_id, image=legacy_image, manifest=manifest - ) - # Insert the manifest child rows (if applicable). if child_manifest_rows: children_to_insert = [ @@ -392,6 +321,131 @@ def _create_manifest( return CreatedManifest(manifest=manifest, newly_created=True, labels_to_apply=labels_to_apply) +def _build_blob_map( + repository_id, + manifest_interface_instance, + retriever, + storage, + raise_on_error=False, + require_empty_layer=True, +): + """ Builds a map containing the digest of each blob referenced by the given manifest, + to its associated Blob row in the database. This method also verifies that the blob + is accessible under the given repository. Returns None on error (unless raise_on_error + is specified). If require_empty_layer is set to True, the method will check if the manifest + references the special shared empty layer blob and, if so, add it to the map. Otherwise, + the empty layer blob is only returned if it was *explicitly* referenced in the manifest. + This is necessary because Docker V2_2/OCI manifests can implicitly reference an empty blob + layer for image layers that only change metadata. + """ + + # Ensure all the blobs in the manifest exist. + digests = set(manifest_interface_instance.local_blob_digests) + blob_map = {} + + # If the special empty layer is required, simply load it directly. This is much faster + # than trying to load it on a per repository basis, and that is unnecessary anyway since + # this layer is predefined. + if EMPTY_LAYER_BLOB_DIGEST in digests: + digests.remove(EMPTY_LAYER_BLOB_DIGEST) + blob_map[EMPTY_LAYER_BLOB_DIGEST] = get_shared_blob(EMPTY_LAYER_BLOB_DIGEST) + if not blob_map[EMPTY_LAYER_BLOB_DIGEST]: + if raise_on_error: + raise CreateManifestException("Unable to retrieve specialized empty blob") + + logger.warning("Could not find the special empty blob in storage") + return None + + if digests: + query = lookup_repo_storages_by_content_checksum(repository_id, digests, with_uploads=True) + blob_map.update({s.content_checksum: s for s in query}) + for digest_str in digests: + if digest_str not in blob_map: + logger.warning( + "Unknown blob `%s` under manifest `%s` for repository `%s`", + digest_str, + manifest_interface_instance.digest, + repository_id, + ) + + if raise_on_error: + raise CreateManifestException("Unknown blob `%s`" % digest_str) + + return None + + # Special check: If the empty layer blob is needed for this manifest, add it to the + # blob map. This is necessary because Docker decided to elide sending of this special + # empty layer in schema version 2, but we need to have it referenced for schema version 1. + if require_empty_layer and EMPTY_LAYER_BLOB_DIGEST not in blob_map: + try: + requires_empty_layer = manifest_interface_instance.get_requires_empty_layer_blob( + retriever + ) + except ManifestException as ex: + if raise_on_error: + raise CreateManifestException(str(ex)) + + return None + + if requires_empty_layer is None: + if raise_on_error: + raise CreateManifestException("Could not load configuration blob") + + return None + + if requires_empty_layer: + shared_blob = get_or_create_shared_blob( + EMPTY_LAYER_BLOB_DIGEST, EMPTY_LAYER_BYTES, storage + ) + assert not shared_blob.uploading + assert shared_blob.content_checksum == EMPTY_LAYER_BLOB_DIGEST + blob_map[EMPTY_LAYER_BLOB_DIGEST] = shared_blob + + return blob_map + + +def populate_legacy_images_for_testing(manifest, manifest_interface_instance, storage): + """ Populates the legacy image rows for the given manifest. """ + # NOTE: This method is only kept around for use by legacy tests that still require + # legacy images. As a result, we make sure we're in testing mode before we run. + assert os.getenv("TEST") == "true" + + repository_id = manifest.repository_id + retriever = RepositoryContentRetriever.for_repository(repository_id, storage) + + blob_map = _build_blob_map( + repository_id, manifest_interface_instance, storage, True, require_empty_layer=True + ) + if blob_map is None: + return None + + # Determine and populate the legacy image if necessary. Manifest lists will not have a legacy + # image. + legacy_image = None + if manifest_interface_instance.has_legacy_image: + try: + legacy_image_id = _populate_legacy_image( + repository_id, manifest_interface_instance, blob_map, retriever, True + ) + except ManifestException as me: + raise CreateManifestException( + "Attempt to create an invalid manifest: %s. Please report this issue." % me + ) + + if legacy_image_id is None: + return None + + legacy_image = get_image(repository_id, legacy_image_id) + if legacy_image is None: + return None + + # Set the legacy image (if applicable). + if legacy_image is not None: + ManifestLegacyImage.create( + repository=repository_id, image=legacy_image, manifest=manifest + ) + + def _populate_legacy_image( repository_id, manifest_interface_instance, blob_map, retriever, raise_on_error=False ): diff --git a/data/model/oci/tag.py b/data/model/oci/tag.py index 3a560c497..81b0e35cf 100644 --- a/data/model/oci/tag.py +++ b/data/model/oci/tag.py @@ -123,7 +123,14 @@ def list_repository_tag_history( Note that the returned Manifest will not contain the manifest contents. """ query = ( - Tag.select(Tag, Manifest.id, Manifest.digest, Manifest.media_type) + Tag.select( + Tag, + Manifest.id, + Manifest.digest, + Manifest.media_type, + Manifest.layers_compressed_size, + Manifest.config_media_type, + ) .join(Manifest) .where(Tag.repository == repository_id) .order_by(Tag.lifetime_start_ms.desc(), Tag.name) @@ -141,31 +148,14 @@ def list_repository_tag_history( if active_tags_only: query = filter_to_alive_tags(query) + else: + query = filter_to_visible_tags(query) - query = filter_to_visible_tags(query) results = list(query) return results[0:page_size], len(results) > page_size -def get_legacy_images_for_tags(tags): - """ - Returns a map from tag ID to the legacy image for the tag. - """ - if not tags: - return {} - - query = ( - ManifestLegacyImage.select(ManifestLegacyImage, Image, ImageStorage) - .join(Image) - .join(ImageStorage) - .where(ManifestLegacyImage.manifest << [tag.manifest_id for tag in tags]) - ) - - by_manifest = {mli.manifest_id: mli.image for mli in query} - return {tag.id: by_manifest[tag.manifest_id] for tag in tags if tag.manifest_id in by_manifest} - - def find_matching_tag(repository_id, tag_names, tag_kinds=None): """ Finds an alive tag in the specified repository with one of the specified tag names and returns @@ -417,7 +407,6 @@ def delete_tags_for_manifest(manifest): """ query = Tag.select().where(Tag.manifest == manifest) query = filter_to_alive_tags(query) - query = filter_to_visible_tags(query) tags = list(query) now_ms = get_epoch_timestamp_ms() @@ -446,9 +435,8 @@ def filter_to_alive_tags(query, now_ms=None, model=Tag): if now_ms is None: now_ms = get_epoch_timestamp_ms() - return query.where((model.lifetime_end_ms >> None) | (model.lifetime_end_ms > now_ms)).where( - model.hidden == False - ) + query = query.where((model.lifetime_end_ms >> None) | (model.lifetime_end_ms > now_ms)) + return filter_to_visible_tags(query) def set_tag_expiration_sec_for_manifest(manifest_id, expiration_seconds): @@ -578,70 +566,6 @@ def tags_containing_legacy_image(image): return filter_to_alive_tags(tags) -def lookup_notifiable_tags_for_legacy_image(docker_image_id, storage_uuid, event_name): - """ - Yields any alive Tags found in repositories with an event with the given name registered and - whose legacy Image has the given docker image ID and storage UUID. - """ - event = ExternalNotificationEvent.get(name=event_name) - images = ( - Image.select() - .join(ImageStorage) - .where(Image.docker_image_id == docker_image_id, ImageStorage.uuid == storage_uuid) - ) - - for image in list(images): - # Ensure the image is under a repository that supports the event. - try: - RepositoryNotification.get(repository=image.repository_id, event=event) - except RepositoryNotification.DoesNotExist: - continue - - # If found in a repository with the valid event, yield the tag(s) that contains the image. - for tag in tags_containing_legacy_image(image): - yield tag - - -def get_tags_for_legacy_image(image_id): - """ Returns the Tag's that have the associated legacy image. - - NOTE: This is for legacy support in the old security notification worker and should - be removed once that code is no longer necessary. - """ - return filter_to_alive_tags( - Tag.select() - .distinct() - .join(Manifest) - .join(ManifestLegacyImage) - .where(ManifestLegacyImage.image == image_id) - ) - - -def _filter_has_repository_event(query, event): - """ Filters the query by ensuring the repositories returned have the given event. - - NOTE: This is for legacy support in the old security notification worker and should - be removed once that code is no longer necessary. - """ - return ( - query.join(Repository) - .join(RepositoryNotification) - .where(RepositoryNotification.event == event) - ) - - -def filter_tags_have_repository_event(query, event): - """ Filters the query by ensuring the tags live in a repository that has the given - event. Also orders the results by lifetime_start_ms. - - NOTE: This is for legacy support in the old security notification worker and should - be removed once that code is no longer necessary. - """ - query = _filter_has_repository_event(query, event) - query = query.switch(Tag).order_by(Tag.lifetime_start_ms.desc()) - return query - - def find_repository_with_garbage(limit_to_gc_policy_s): """ Returns a repository that has garbage (defined as an expired Tag that is past the repo's namespace's expiration window) or None if none. @@ -680,3 +604,20 @@ def find_repository_with_garbage(limit_to_gc_policy_s): return None except Repository.DoesNotExist: return None + + +def get_legacy_images_for_tags(tags): + """ + Returns a map from tag ID to the legacy image for the tag. + """ + if not tags: + return {} + + query = ( + ManifestLegacyImage.select(ManifestLegacyImage, Image) + .join(Image) + .where(ManifestLegacyImage.manifest << [tag.manifest_id for tag in tags]) + ) + + by_manifest = {mli.manifest_id: mli.image for mli in query} + return {tag.id: by_manifest[tag.manifest_id] for tag in tags if tag.manifest_id in by_manifest} diff --git a/data/model/oci/test/test_oci_manifest.py b/data/model/oci/test/test_oci_manifest.py index ec71d2ddd..4d7fadae1 100644 --- a/data/model/oci/test/test_oci_manifest.py +++ b/data/model/oci/test/test_oci_manifest.py @@ -166,6 +166,8 @@ def test_get_or_create_manifest(schema_version, initialized_db): builder.add_layer(random_digest, len(random_data.encode("utf-8"))) sample_manifest_instance = builder.build() + assert sample_manifest_instance.layers_compressed_size is not None + # Create a new manifest. created_manifest = get_or_create_manifest(repository, sample_manifest_instance, storage) created = created_manifest.manifest @@ -177,15 +179,18 @@ def test_get_or_create_manifest(schema_version, initialized_db): assert created.digest == sample_manifest_instance.digest assert created.manifest_bytes == sample_manifest_instance.bytes.as_encoded_str() assert created_manifest.labels_to_apply == expected_labels + assert created.config_media_type == sample_manifest_instance.config_media_type + assert created.layers_compressed_size == sample_manifest_instance.layers_compressed_size + + # Lookup the manifest and verify. + found = lookup_manifest(repository, created.digest, allow_dead=True) + assert found.digest == created.digest + assert found.config_media_type == created.config_media_type + assert found.layers_compressed_size == created.layers_compressed_size # Verify it has a temporary tag pointing to it. assert Tag.get(manifest=created, hidden=True).lifetime_end_ms - # Verify the legacy image. - legacy_image = get_legacy_image_for_manifest(created) - assert legacy_image is not None - assert legacy_image.storage.content_checksum == random_digest - # Verify the linked blobs. blob_digests = [ mb.blob.content_checksum @@ -295,6 +300,8 @@ def test_get_or_create_manifest_list(initialized_db): assert created_list assert created_list.media_type.name == manifest_list.media_type assert created_list.digest == manifest_list.digest + assert created_list.config_media_type == manifest_list.config_media_type + assert created_list.layers_compressed_size == manifest_list.layers_compressed_size # Ensure the child manifest links exist. child_manifests = { @@ -423,6 +430,8 @@ def test_get_or_create_manifest_with_remote_layers(initialized_db): assert created_manifest assert created_manifest.media_type.name == manifest.media_type assert created_manifest.digest == manifest.digest + assert created_manifest.config_media_type == manifest.config_media_type + assert created_manifest.layers_compressed_size == manifest.layers_compressed_size # Verify the legacy image. legacy_image = get_legacy_image_for_manifest(created_manifest) diff --git a/data/model/oci/test/test_oci_tag.py b/data/model/oci/test/test_oci_tag.py index 360e134b2..8aef55df0 100644 --- a/data/model/oci/test/test_oci_tag.py +++ b/data/model/oci/test/test_oci_tag.py @@ -18,7 +18,6 @@ from data.model.oci.tag import ( get_most_recent_tag, get_most_recent_tag_lifetime_start, list_alive_tags, - get_legacy_images_for_tags, filter_to_alive_tags, filter_to_visible_tags, list_repository_tag_history, @@ -92,13 +91,6 @@ def test_list_alive_tags(initialized_db): for tag in filter_to_visible_tags(filter_to_alive_tags(Tag.select())): tags = list_alive_tags(tag.repository) assert tag in tags - - with assert_query_count(1): - legacy_images = get_legacy_images_for_tags(tags) - - for tag in tags: - assert ManifestLegacyImage.get(manifest=tag.manifest).image == legacy_images[tag.id] - found = True assert found @@ -154,6 +146,11 @@ def test_list_repository_tag_history(namespace_name, repo_name, initialized_db): assert results assert not has_more + assert results[0].manifest.id is not None + assert results[0].manifest.digest is not None + assert results[0].manifest.media_type is not None + assert results[0].manifest.layers_compressed_size is not None + def test_list_repository_tag_history_with_history(initialized_db): repo = get_repository("devtable", "history") diff --git a/data/model/repo_mirror.py b/data/model/repo_mirror.py index ddd846ccf..2e65f0d6d 100644 --- a/data/model/repo_mirror.py +++ b/data/model/repo_mirror.py @@ -2,7 +2,7 @@ import re from datetime import datetime, timedelta -from peewee import IntegrityError, fn +from peewee import IntegrityError, fn, JOIN from jsonschema import ValidationError from data.database import ( @@ -14,6 +14,7 @@ from data.database import ( Repository, uuid_generator, db_transaction, + User, ) from data.fields import DecryptedValue from data.model import DataModelException @@ -362,7 +363,14 @@ def get_mirror(repository): Return the RepoMirrorConfig associated with the given Repository, or None if it doesn't exist. """ try: - return RepoMirrorConfig.get(repository=repository) + return ( + RepoMirrorConfig.select(RepoMirrorConfig, User, RepoMirrorRule) + .join(User, JOIN.LEFT_OUTER) + .switch(RepoMirrorConfig) + .join(RepoMirrorRule) + .where(RepoMirrorConfig.repository == repository) + .get() + ) except RepoMirrorConfig.DoesNotExist: return None diff --git a/data/model/repository.py b/data/model/repository.py index d0ff34b5f..423766cd0 100644 --- a/data/model/repository.py +++ b/data/model/repository.py @@ -32,7 +32,6 @@ from data.database import ( RepositoryActionCount, Role, RepositoryAuthorizedEmail, - DerivedStorageForImage, Label, db_for_update, get_epoch_timestamp, @@ -500,6 +499,10 @@ def lookup_repository(repo_id): return None +def repository_visibility_name(repository): + return "public" if is_repository_public(repository) else "private" + + def is_repository_public(repository): return repository.visibility_id == _basequery.get_public_repo_visibility().id diff --git a/data/model/storage.py b/data/model/storage.py index 94b96d7ab..061d9ee87 100644 --- a/data/model/storage.py +++ b/data/model/storage.py @@ -25,6 +25,7 @@ from data.database import ( ApprBlob, ensure_under_transaction, ManifestBlob, + UploadedBlob, ) logger = logging.getLogger(__name__) @@ -86,7 +87,13 @@ def _is_storage_orphaned(candidate_id): except Image.DoesNotExist: pass - return True + try: + UploadedBlob.get(blob=candidate_id) + return False + except UploadedBlob.DoesNotExist: + pass + + return True def garbage_collect_storage(storage_id_whitelist): @@ -307,57 +314,65 @@ def get_layer_path_for_storage(storage_uuid, cas_path, content_checksum): return store.blob_path(content_checksum) -def lookup_repo_storages_by_content_checksum(repo, checksums, by_manifest=False): +def lookup_repo_storages_by_content_checksum(repo, checksums, with_uploads=False): """ Looks up repository storages (without placements) matching the given repository and checksum. """ + checksums = list(set(checksums)) if not checksums: return [] + # If the request is not with uploads, simply return the blobs found under the manifests + # for the repository. + if not with_uploads: + return _lookup_repo_storages_by_content_checksum(repo, checksums, ManifestBlob) + + # Otherwise, first check the UploadedBlob table and, once done, then check the ManifestBlob + # table. + found_via_uploaded = list( + _lookup_repo_storages_by_content_checksum(repo, checksums, UploadedBlob) + ) + if len(found_via_uploaded) == len(checksums): + return found_via_uploaded + + checksums_remaining = set(checksums) - { + uploaded.content_checksum for uploaded in found_via_uploaded + } + found_via_manifest = list( + _lookup_repo_storages_by_content_checksum(repo, checksums_remaining, ManifestBlob) + ) + return found_via_uploaded + found_via_manifest + + +def _lookup_repo_storages_by_content_checksum(repo, checksums, model_class): + assert checksums + # There may be many duplicates of the checksums, so for performance reasons we are going # to use a union to select just one storage with each checksum queries = [] - for counter, checksum in enumerate(set(checksums)): + for counter, checksum in enumerate(checksums): query_alias = "q{0}".format(counter) - # TODO: Remove once we have a new-style model for tracking temp uploaded blobs and - # all legacy tables have been removed. - if by_manifest: - candidate_subq = ( - ImageStorage.select( - ImageStorage.id, - ImageStorage.content_checksum, - ImageStorage.image_size, - ImageStorage.uuid, - ImageStorage.cas_path, - ImageStorage.uncompressed_size, - ImageStorage.uploading, - ) - .join(ManifestBlob) - .where(ManifestBlob.repository == repo, ImageStorage.content_checksum == checksum) - .limit(1) - .alias(query_alias) - ) - else: - candidate_subq = ( - ImageStorage.select( - ImageStorage.id, - ImageStorage.content_checksum, - ImageStorage.image_size, - ImageStorage.uuid, - ImageStorage.cas_path, - ImageStorage.uncompressed_size, - ImageStorage.uploading, - ) - .join(Image) - .where(Image.repository == repo, ImageStorage.content_checksum == checksum) - .limit(1) - .alias(query_alias) + candidate_subq = ( + ImageStorage.select( + ImageStorage.id, + ImageStorage.content_checksum, + ImageStorage.image_size, + ImageStorage.uuid, + ImageStorage.cas_path, + ImageStorage.uncompressed_size, + ImageStorage.uploading, ) + .join(model_class) + .where(model_class.repository == repo, ImageStorage.content_checksum == checksum) + .limit(1) + .alias(query_alias) + ) queries.append(ImageStorage.select(SQL("*")).from_(candidate_subq)) + assert queries return _basequery.reduce_as_tree(queries) diff --git a/data/model/tag.py b/data/model/tag.py index 061e96604..e64cbcf3e 100644 --- a/data/model/tag.py +++ b/data/model/tag.py @@ -1,75 +1,9 @@ -import logging - -from calendar import timegm -from datetime import datetime -from uuid import uuid4 - -from peewee import IntegrityError, JOIN, fn -from data.model import ( - image, - storage, - db_transaction, - DataModelException, - _basequery, - InvalidManifestException, - TagAlreadyCreatedException, - StaleTagException, - config, -) from data.database import ( RepositoryTag, Repository, - RepositoryState, - Image, - ImageStorage, Namespace, - TagManifest, - RepositoryNotification, - Label, - TagManifestLabel, get_epoch_timestamp, - db_for_update, - Manifest, - ManifestLabel, - ManifestBlob, - ManifestLegacyImage, - TagManifestToManifest, - TagManifestLabelMap, - TagToRepositoryTag, - Tag, - get_epoch_timestamp_ms, ) -from util.timedeltastring import convert_to_timedelta - - -logger = logging.getLogger(__name__) - - -def create_temporary_hidden_tag(repo, image_obj, expiration_s): - """ - Create a tag with a defined timeline, that will not appear in the UI or CLI. - - Returns the name of the temporary tag or None on error. - """ - now_ts = get_epoch_timestamp() - expire_ts = now_ts + expiration_s - tag_name = str(uuid4()) - - # Ensure the repository is not marked for deletion. - with db_transaction(): - current = Repository.get(id=repo) - if current.state == RepositoryState.MARKED_FOR_DELETION: - return None - - RepositoryTag.create( - repository=repo, - image=image_obj, - name=tag_name, - lifetime_start_ts=now_ts, - lifetime_end_ts=expire_ts, - hidden=True, - ) - return tag_name def lookup_unrecoverable_tags(repo): diff --git a/data/model/test/test_gc.py b/data/model/test/test_gc.py index eb91341df..f102155f0 100644 --- a/data/model/test/test_gc.py +++ b/data/model/test/test_gc.py @@ -30,6 +30,7 @@ from data.database import ( TagToRepositoryTag, ImageStorageLocation, RepositoryTag, + UploadedBlob, ) from data.model.oci.test.test_oci_manifest import create_manifest_for_testing from digest.digest_tools import sha256_digest @@ -61,11 +62,7 @@ def default_tag_policy(initialized_db): def _delete_temp_links(repo): """ Deletes any temp links to blobs. """ - for hidden in list( - RepositoryTag.select().where(RepositoryTag.hidden == True, RepositoryTag.repository == repo) - ): - hidden.delete_instance() - hidden.image.delete_instance() + UploadedBlob.delete().where(UploadedBlob.repository == repo).execute() def _populate_blob(repo, content): @@ -128,6 +125,10 @@ def move_tag(repository, tag, image_ids, expect_gc=True): repo_ref, manifest, tag, storage, raise_on_error=True ) + tag_ref = registry_model.get_repo_tag(repo_ref, tag) + manifest_ref = registry_model.get_manifest_for_tag(tag_ref) + registry_model.populate_legacy_images_for_testing(manifest_ref, storage) + if expect_gc: assert model.gc.garbage_collect_repo(repository) == expect_gc @@ -156,10 +157,17 @@ def _get_dangling_storage_count(): storage_ids = set([current.id for current in ImageStorage.select()]) referenced_by_image = set([image.storage_id for image in Image.select()]) referenced_by_manifest = set([blob.blob_id for blob in ManifestBlob.select()]) - referenced_by_derived = set( + referenced_by_uploaded = set([upload.blob_id for upload in UploadedBlob.select()]) + referenced_by_derived_image = set( [derived.derivative_id for derived in DerivedStorageForImage.select()] ) - return len(storage_ids - referenced_by_image - referenced_by_derived - referenced_by_manifest) + return len( + storage_ids + - referenced_by_image + - referenced_by_derived_image + - referenced_by_manifest + - referenced_by_uploaded + ) def _get_dangling_label_count(): @@ -199,7 +207,7 @@ def assert_gc_integrity(expect_storage_removed=True): for blob_row in ApprBlob.select(): existing_digests.add(blob_row.digest) - # Store the number of dangling storages and labels. + # Store the number of dangling objects. existing_storage_count = _get_dangling_storage_count() existing_label_count = _get_dangling_label_count() existing_manifest_count = _get_dangling_manifest_count() @@ -247,6 +255,13 @@ def assert_gc_integrity(expect_storage_removed=True): .count() ) + if shared == 0: + shared = ( + UploadedBlob.select() + .where(UploadedBlob.blob == removed_image_and_storage.storage_id) + .count() + ) + if shared == 0: with pytest.raises(ImageStorage.DoesNotExist): ImageStorage.get(id=removed_image_and_storage.storage_id) @@ -672,6 +687,10 @@ def test_images_shared_cas(default_tag_policy, initialized_db): repo_ref, manifest, "first", storage, raise_on_error=True ) + tag_ref = registry_model.get_repo_tag(repo_ref, "first") + manifest_ref = registry_model.get_manifest_for_tag(tag_ref) + registry_model.populate_legacy_images_for_testing(manifest_ref, storage) + # Store another as `second`. builder = DockerSchema1ManifestBuilder( repository.namespace_user.username, repository.name, "second" @@ -682,6 +701,10 @@ def test_images_shared_cas(default_tag_policy, initialized_db): repo_ref, manifest, "second", storage, raise_on_error=True ) + tag_ref = registry_model.get_repo_tag(repo_ref, "second") + manifest_ref = registry_model.get_manifest_for_tag(tag_ref) + registry_model.populate_legacy_images_for_testing(manifest_ref, storage) + # Manually retarget the second manifest's blob to the second row. try: second_blob = ManifestBlob.get(manifest=created._db_id, blob=is1) diff --git a/data/model/test/test_image.py b/data/model/test/test_image.py deleted file mode 100644 index 267c46c7c..000000000 --- a/data/model/test/test_image.py +++ /dev/null @@ -1,109 +0,0 @@ -import pytest - -from collections import defaultdict -from data.model import image, repository -from playhouse.test_utils import assert_query_count - -from test.fixtures import * - - -@pytest.fixture() -def images(initialized_db): - images = image.get_repository_images("devtable", "simple") - assert len(images) - return images - - -def test_get_image_with_storage(images, initialized_db): - for current in images: - storage_uuid = current.storage.uuid - - with assert_query_count(1): - retrieved = image.get_image_with_storage(current.docker_image_id, storage_uuid) - assert retrieved.id == current.id - assert retrieved.storage.uuid == storage_uuid - - -def test_get_parent_images(images, initialized_db): - for current in images: - if not len(current.ancestor_id_list()): - continue - - with assert_query_count(1): - parent_images = list(image.get_parent_images("devtable", "simple", current)) - - assert len(parent_images) == len(current.ancestor_id_list()) - assert set(current.ancestor_id_list()) == {i.id for i in parent_images} - - for parent in parent_images: - with assert_query_count(0): - assert parent.storage.id - - -def test_get_image(images, initialized_db): - for current in images: - repo = current.repository - - with assert_query_count(1): - found = image.get_image(repo, current.docker_image_id) - - assert found.id == current.id - - -def test_placements(images, initialized_db): - with assert_query_count(1): - placements_map = image.get_placements_for_images(images) - - for current in images: - assert current.storage.id in placements_map - - with assert_query_count(2): - expected_image, expected_placements = image.get_image_and_placements( - "devtable", "simple", current.docker_image_id - ) - - assert expected_image.id == current.id - assert len(expected_placements) == len(placements_map.get(current.storage.id)) - assert {p.id for p in expected_placements} == { - p.id for p in placements_map.get(current.storage.id) - } - - -def test_get_repo_image(images, initialized_db): - for current in images: - with assert_query_count(1): - found = image.get_repo_image("devtable", "simple", current.docker_image_id) - - assert found.id == current.id - with assert_query_count(1): - assert found.storage.id - - -def test_get_repo_image_and_storage(images, initialized_db): - for current in images: - with assert_query_count(1): - found = image.get_repo_image_and_storage("devtable", "simple", current.docker_image_id) - - assert found.id == current.id - with assert_query_count(0): - assert found.storage.id - - -def test_get_repository_images_without_placements(images, initialized_db): - ancestors_map = defaultdict(list) - for img in images: - current = img.parent - while current is not None: - ancestors_map[current.id].append(img.id) - current = current.parent - - for current in images: - repo = current.repository - - with assert_query_count(1): - found = list( - image.get_repository_images_without_placements(repo, with_ancestor=current) - ) - - assert len(found) == len(ancestors_map[current.id]) + 1 - assert {i.id for i in found} == set(ancestors_map[current.id] + [current.id]) diff --git a/data/model/test/test_tag.py b/data/model/test/test_tag.py deleted file mode 100644 index cfb7bede0..000000000 --- a/data/model/test/test_tag.py +++ /dev/null @@ -1,23 +0,0 @@ -import pytest - -from data.database import ( - RepositoryState, - Image, -) - -from test.fixtures import * - - -def test_create_temp_tag(initialized_db): - repo = model.repository.get_repository("devtable", "simple") - image = Image.get(repository=repo) - assert model.tag.create_temporary_hidden_tag(repo, image, 10000000) is not None - - -def test_create_temp_tag_deleted_repo(initialized_db): - repo = model.repository.get_repository("devtable", "simple") - repo.state = RepositoryState.MARKED_FOR_DELETION - repo.save() - - image = Image.get(repository=repo) - assert model.tag.create_temporary_hidden_tag(repo, image, 10000000) is None diff --git a/data/model/user.py b/data/model/user.py index 402b6a779..4c0064fa6 100644 --- a/data/model/user.py +++ b/data/model/user.py @@ -1326,7 +1326,11 @@ def get_region_locations(user): """ Returns the locations defined as preferred storage for the given user. """ - query = UserRegion.select().join(ImageStorageLocation).where(UserRegion.user == user) + query = ( + UserRegion.select(UserRegion, ImageStorageLocation) + .join(ImageStorageLocation) + .where(UserRegion.user == user) + ) return set([region.location.name for region in query]) diff --git a/data/registry_model/__init__.py b/data/registry_model/__init__.py index 26eadcd5d..140ede42f 100644 --- a/data/registry_model/__init__.py +++ b/data/registry_model/__init__.py @@ -13,6 +13,9 @@ class RegistryModelProxy(object): def __getattr__(self, attr): return getattr(self._model, attr) + def set_id_hash_salt(self, hash_salt): + self._model.set_id_hash_salt(hash_salt) + registry_model = RegistryModelProxy() logger.info("===============================") diff --git a/data/registry_model/datatypes.py b/data/registry_model/datatypes.py index fcec97953..e36a14e7d 100644 --- a/data/registry_model/datatypes.py +++ b/data/registry_model/datatypes.py @@ -1,4 +1,5 @@ import hashlib +import json from collections import namedtuple from enum import Enum, unique @@ -172,8 +173,8 @@ class Label(datatype("Label", ["key", "value", "uuid", "source_type_name", "medi key=label.key, value=label.value, uuid=label.uuid, - media_type_name=label.media_type.name, - source_type_name=label.source_type.name, + media_type_name=model.label.get_media_types()[label.media_type_id], + source_type_name=model.label.get_label_source_types()[label.source_type_id], ) @@ -189,13 +190,6 @@ class ShallowTag(datatype("ShallowTag", ["name"])): return ShallowTag(db_id=tag.id, name=tag.name) - @classmethod - def for_repository_tag(cls, repository_tag): - if repository_tag is None: - return None - - return ShallowTag(db_id=repository_tag.id, name=repository_tag.name) - @property def id(self): """ @@ -223,7 +217,7 @@ class Tag( """ @classmethod - def for_tag(cls, tag, legacy_image=None): + def for_tag(cls, tag, legacy_id_handler, manifest_row=None, legacy_image_row=None): if tag is None: return None @@ -235,55 +229,34 @@ class Tag( lifetime_end_ms=tag.lifetime_end_ms, lifetime_start_ts=tag.lifetime_start_ms // 1000, lifetime_end_ts=tag.lifetime_end_ms // 1000 if tag.lifetime_end_ms else None, - manifest_digest=tag.manifest.digest, + manifest_digest=manifest_row.digest if manifest_row else tag.manifest.digest, inputs=dict( - legacy_image=legacy_image, - manifest=tag.manifest, + legacy_id_handler=legacy_id_handler, + legacy_image_row=legacy_image_row, + manifest_row=manifest_row or tag.manifest, repository=RepositoryReference.for_id(tag.repository_id), ), ) - @classmethod - def for_repository_tag(cls, repository_tag, manifest_digest=None, legacy_image=None): - if repository_tag is None: - return None - - return Tag( - db_id=repository_tag.id, - name=repository_tag.name, - reversion=repository_tag.reversion, - lifetime_start_ts=repository_tag.lifetime_start_ts, - lifetime_end_ts=repository_tag.lifetime_end_ts, - lifetime_start_ms=repository_tag.lifetime_start_ts * 1000, - lifetime_end_ms=( - repository_tag.lifetime_end_ts * 1000 if repository_tag.lifetime_end_ts else None - ), - manifest_digest=manifest_digest, - inputs=dict( - legacy_image=legacy_image, - repository=RepositoryReference.for_id(repository_tag.repository_id), - ), - ) + @property + @requiresinput("manifest_row") + def _manifest_row(self, manifest_row): + """ + Returns the database Manifest object for this tag. + """ + return manifest_row @property - @requiresinput("manifest") - def _manifest(self, manifest): + @requiresinput("manifest_row") + @requiresinput("legacy_id_handler") + @optionalinput("legacy_image_row") + def manifest(self, manifest_row, legacy_id_handler, legacy_image_row): """ Returns the manifest for this tag. - - Will only apply to new-style OCI tags. """ - return manifest - - @property - @optionalinput("manifest") - def manifest(self, manifest): - """ - Returns the manifest for this tag or None if none. - - Will only apply to new-style OCI tags. - """ - return Manifest.for_manifest(manifest, self.legacy_image_if_present) + return Manifest.for_manifest( + manifest_row, legacy_id_handler, legacy_image_row=legacy_image_row + ) @property @requiresinput("repository") @@ -293,28 +266,6 @@ class Tag( """ return repository - @property - @requiresinput("legacy_image") - def legacy_image(self, legacy_image): - """ - Returns the legacy Docker V1-style image for this tag. - - Note that this will be None for tags whose manifests point to other manifests instead of - images. - """ - return legacy_image - - @property - @optionalinput("legacy_image") - def legacy_image_if_present(self, legacy_image): - """ - Returns the legacy Docker V1-style image for this tag. - - Note that this will be None for tags whose manifests point to other manifests instead of - images. - """ - return legacy_image - @property def id(self): """ @@ -322,31 +273,32 @@ class Tag( """ return self._db_id + @property + def manifest_layers_size(self): + """ Returns the compressed size of the layers of the manifest for the Tag or + None if none applicable or loaded. + """ + return self.manifest.layers_compressed_size -class Manifest(datatype("Manifest", ["digest", "media_type", "internal_manifest_bytes"])): + +class Manifest( + datatype( + "Manifest", + [ + "digest", + "media_type", + "config_media_type", + "_layers_compressed_size", + "internal_manifest_bytes", + ], + ) +): """ Manifest represents a manifest in a repository. """ @classmethod - def for_tag_manifest(cls, tag_manifest, legacy_image=None): - if tag_manifest is None: - return None - - return Manifest( - db_id=tag_manifest.id, - digest=tag_manifest.digest, - internal_manifest_bytes=Bytes.for_string_or_unicode(tag_manifest.json_data), - media_type=DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE, # Always in legacy. - inputs=dict( - legacy_image=legacy_image, - tag_manifest=True, - repository=RepositoryReference.for_id(tag_manifest.repository_id), - ), - ) - - @classmethod - def for_manifest(cls, manifest, legacy_image): + def for_manifest(cls, manifest, legacy_id_handler, legacy_image_row=None): if manifest is None: return None @@ -361,36 +313,15 @@ class Manifest(datatype("Manifest", ["digest", "media_type", "internal_manifest_ digest=manifest.digest, internal_manifest_bytes=manifest_bytes, media_type=ManifestTable.media_type.get_name(manifest.media_type_id), + _layers_compressed_size=manifest.layers_compressed_size, + config_media_type=manifest.config_media_type, inputs=dict( - legacy_image=legacy_image, - tag_manifest=False, + legacy_id_handler=legacy_id_handler, + legacy_image_row=legacy_image_row, repository=RepositoryReference.for_id(manifest.repository_id), ), ) - @property - @requiresinput("tag_manifest") - def _is_tag_manifest(self, tag_manifest): - return tag_manifest - - @property - @requiresinput("legacy_image") - def legacy_image(self, legacy_image): - """ - Returns the legacy Docker V1-style image for this manifest. - """ - return legacy_image - - @property - @optionalinput("legacy_image") - def legacy_image_if_present(self, legacy_image): - """ - Returns the legacy Docker V1-style image for this manifest. - - Note that this will be None for manifests that point to other manifests instead of images. - """ - return legacy_image - def get_parsed_manifest(self, validate=True): """ Returns the parsed manifest for this manifest. @@ -400,17 +331,6 @@ class Manifest(datatype("Manifest", ["digest", "media_type", "internal_manifest_ self.internal_manifest_bytes, self.media_type, validate=validate ) - @property - def layers_compressed_size(self): - """ - Returns the total compressed size of the layers in the manifest or None if this could not be - computed. - """ - try: - return self.get_parsed_manifest().layers_compressed_size - except ManifestException: - return None - @property def is_manifest_list(self): """ @@ -426,9 +346,67 @@ class Manifest(datatype("Manifest", ["digest", "media_type", "internal_manifest_ """ return repository + @optionalinput("legacy_image_row") + def _legacy_image_row(self, legacy_image_row): + return legacy_image_row + + @property + def layers_compressed_size(self): + # TODO: Simplify once we've stopped writing Image rows and we've backfilled the + # sizes. + + # First check the manifest itself, as all newly written manifests will have the + # size. + if self._layers_compressed_size is not None: + return self._layers_compressed_size + + # Secondly, check for the size of the legacy Image row. + legacy_image_row = self._legacy_image_row + if legacy_image_row: + return legacy_image_row.aggregate_size + + # Otherwise, return None. + return None + + @property + @requiresinput("legacy_id_handler") + def legacy_image_root_id(self, legacy_id_handler): + """ + Returns the legacy Docker V1-style image ID for this manifest. Note that an ID will + be returned even if the manifest does not support a legacy image. + """ + return legacy_id_handler.encode(self._db_id) + + def as_manifest(self): + """ Returns the manifest or legacy image as a manifest. """ + return self + + @property + @requiresinput("legacy_id_handler") + def _legacy_id_handler(self, legacy_id_handler): + return legacy_id_handler + + def lookup_legacy_image(self, layer_index, retriever): + """ Looks up and returns the legacy image for index-th layer in this manifest + or None if none. The indexes here are from leaf to root, with index 0 being + the leaf. + """ + # Retrieve the schema1 manifest. If none exists, legacy images are not supported. + parsed = self.get_parsed_manifest() + if parsed is None: + return None + + schema1 = parsed.get_schema1_manifest("$namespace", "$repo", "$tag", retriever) + if schema1 is None: + return None + + return LegacyImage.for_schema1_manifest_layer_index( + self, schema1, layer_index, self._legacy_id_handler + ) + class LegacyImage( - datatype( + namedtuple( "LegacyImage", [ "docker_image_id", @@ -437,8 +415,14 @@ class LegacyImage( "command", "image_size", "aggregate_size", - "uploading", + "blob", + "blob_digest", "v1_metadata_string", + # Internal fields. + "layer_index", + "manifest", + "parsed_manifest", + "id_handler", ], ) ): @@ -447,74 +431,80 @@ class LegacyImage( """ @classmethod - def for_image(cls, image, images_map=None, tags_map=None, blob=None): - if image is None: + def for_schema1_manifest_layer_index( + cls, manifest, parsed_manifest, layer_index, id_handler, blob=None + ): + assert parsed_manifest.schema_version == 1 + layers = parsed_manifest.layers + if layer_index >= len(layers): + return None + + # NOTE: Schema1 keeps its layers in the order from base to leaf, so we have + # to reverse our lookup order. + leaf_to_base = list(reversed(layers)) + + aggregated_size = sum( + [ + l.compressed_size + for index, l in enumerate(leaf_to_base) + if index >= layer_index and l.compressed_size is not None + ] + ) + + layer = leaf_to_base[layer_index] + synthetic_layer_id = id_handler.encode(manifest._db_id, layer_index) + + # Replace the image ID and parent ID with our synethetic IDs. + try: + parsed = json.loads(layer.raw_v1_metadata) + parsed["id"] = synthetic_layer_id + if layer_index < len(leaf_to_base) - 1: + parsed["parent"] = id_handler.encode(manifest._db_id, layer_index + 1) + except (ValueError, TypeError): return None return LegacyImage( - db_id=image.id, - inputs=dict( - images_map=images_map, - tags_map=tags_map, - ancestor_id_list=image.ancestor_id_list(), - blob=blob, - ), - docker_image_id=image.docker_image_id, - created=image.created, - comment=image.comment, - command=image.command, - v1_metadata_string=image.v1_json_metadata, - image_size=image.storage.image_size, - aggregate_size=image.aggregate_size, - uploading=image.storage.uploading, + docker_image_id=synthetic_layer_id, + created=layer.v1_metadata.created, + comment=layer.v1_metadata.comment, + command=layer.v1_metadata.command, + image_size=layer.compressed_size, + aggregate_size=aggregated_size, + blob=blob, + blob_digest=layer.digest, + v1_metadata_string=json.dumps(parsed), + layer_index=layer_index, + manifest=manifest, + parsed_manifest=parsed_manifest, + id_handler=id_handler, ) - @property - def id(self): - """ - Returns the database ID of the legacy image. - """ - return self._db_id + def with_blob(self, blob): + """ Sets the blob for the legacy image. """ + return self._replace(blob=blob) @property - @requiresinput("images_map") - @requiresinput("ancestor_id_list") - def parents(self, images_map, ancestor_id_list): - """ - Returns the parent images for this image. + def parent_image_id(self): + ancestor_ids = self.ancestor_ids + if not ancestor_ids: + return None - Raises an exception if the parents have not been loaded before this property is invoked. - Parents are returned starting at the leaf image. - """ - return [ - LegacyImage.for_image(images_map[ancestor_id], images_map=images_map) - for ancestor_id in reversed(ancestor_id_list) - if images_map.get(ancestor_id) - ] + return ancestor_ids[-1] @property - @requiresinput("blob") - def blob(self, blob): - """ - Returns the blob for this image. - - Raises an exception if the blob has not been loaded before this property is invoked. - """ - return blob + def ancestor_ids(self): + ancestor_ids = [] + for layer_index in range(self.layer_index + 1, len(self.parsed_manifest.layers)): + ancestor_ids.append(self.id_handler.encode(self.manifest._db_id, layer_index)) + return ancestor_ids @property - @requiresinput("tags_map") - def tags(self, tags_map): - """ - Returns the tags pointing to this image. + def full_image_id_chain(self): + return [self.docker_image_id] + self.ancestor_ids - Raises an exception if the tags have not been loaded before this property is invoked. - """ - tags = tags_map.get(self._db_id) - if not tags: - return [] - - return [Tag.for_tag(tag) for tag in tags] + def as_manifest(self): + """ Returns the parent manifest for the legacy image. """ + return self.manifest @unique @@ -579,7 +569,6 @@ class Blob( """ Returns the path of this blob in storage. """ - # TODO: change this to take in the storage engine? return storage_path @property @@ -591,27 +580,6 @@ class Blob( return placements -class DerivedImage(datatype("DerivedImage", ["verb", "varying_metadata", "blob"])): - """ - DerivedImage represents an image derived from a manifest via some form of verb. - """ - - @classmethod - def for_derived_storage(cls, derived, verb, varying_metadata, blob): - return DerivedImage( - db_id=derived.id, verb=verb, varying_metadata=varying_metadata, blob=blob - ) - - @property - def unique_id(self): - """ - Returns a unique ID for this derived image. - - This call will consistently produce the same unique ID across calls in the same code base. - """ - return hashlib.sha256(("%s:%s" % (self.verb, self._db_id)).encode("utf-8")).hexdigest() - - class BlobUpload( datatype( "BlobUpload", @@ -662,13 +630,6 @@ class LikelyVulnerableTag(datatype("LikelyVulnerableTag", ["layer_id", "name"])) db_id=tag.id, name=tag.name, layer_id=layer_id, inputs=dict(repository=repository) ) - @classmethod - def for_repository_tag(cls, tag, repository): - tag_layer_id = "%s.%s" % (tag.image.docker_image_id, tag.image.storage.uuid) - return LikelyVulnerableTag( - db_id=tag.id, name=tag.name, layer_id=tag_layer_id, inputs=dict(repository=repository) - ) - @property @requiresinput("repository") def repository(self, repository): diff --git a/data/registry_model/interface.py b/data/registry_model/interface.py index 26c5b1519..c70b2a7b7 100644 --- a/data/registry_model/interface.py +++ b/data/registry_model/interface.py @@ -14,16 +14,13 @@ class RegistryDataInterface(object): @abstractmethod def get_tag_legacy_image_id(self, repository_ref, tag_name, storage): """ - Returns the legacy image ID for the tag with a legacy images in the repository. - - Returns None if None. + Returns the legacy image ID for the tag in the repository or None if none. """ @abstractmethod def get_legacy_tags_map(self, repository_ref, storage): """ - Returns a map from tag name to its legacy image ID, for all tags with legacy images in the - repository. + Returns a map from tag name to its legacy image ID, for all tags in the repository. Note that this can be a *very* heavy operation. """ @@ -51,19 +48,14 @@ class RegistryDataInterface(object): """ @abstractmethod - def get_manifest_for_tag(self, tag, backfill_if_necessary=False, include_legacy_image=False): + def get_manifest_for_tag(self, tag): """ Returns the manifest associated with the given tag. """ @abstractmethod def lookup_manifest_by_digest( - self, - repository_ref, - manifest_digest, - allow_dead=False, - include_legacy_image=False, - require_available=False, + self, repository_ref, manifest_digest, allow_dead=False, require_available=False, ): """ Looks up the manifest with the given digest under the given repository and returns it or @@ -92,15 +84,7 @@ class RegistryDataInterface(object): """ @abstractmethod - def get_legacy_images(self, repository_ref): - """ - Returns an iterator of all the LegacyImage's defined in the matching repository. - """ - - @abstractmethod - def get_legacy_image( - self, repository_ref, docker_image_id, include_parents=False, include_blob=False - ): + def get_legacy_image(self, repository_ref, docker_image_id, storage, include_blob=False): """ Returns the matching LegacyImages under the matching repository, if any. @@ -170,12 +154,12 @@ class RegistryDataInterface(object): """ @abstractmethod - def list_all_active_repository_tags(self, repository_ref, include_legacy_images=False): + def list_all_active_repository_tags(self, repository_ref): """ Returns a list of all the active tags in the repository. Note that this is a *HEAVY* operation on repositories with a lot of tags, and should only be - used for testing or where other more specific operations are not possible. + used for testing or legacy operations. """ @abstractmethod @@ -204,7 +188,7 @@ class RegistryDataInterface(object): """ @abstractmethod - def get_repo_tag(self, repository_ref, tag_name, include_legacy_image=False): + def get_repo_tag(self, repository_ref, tag_name): """ Returns the latest, *active* tag found in the repository, with the matching name or None if none. @@ -259,12 +243,6 @@ class RegistryDataInterface(object): previous expiration timestamp in seconds (if any), and whether the operation succeeded. """ - @abstractmethod - def get_legacy_images_owned_by_tag(self, tag): - """ - Returns all legacy images *solely owned and used* by the given tag. - """ - @abstractmethod def get_security_status(self, manifest_or_legacy_image): """ @@ -319,57 +297,6 @@ class RegistryDataInterface(object): `image.docker.types.ManifestImageLayer`. Should not be called for a manifest list. """ - @abstractmethod - def lookup_derived_image( - self, manifest, verb, storage, varying_metadata=None, include_placements=False - ): - """ - Looks up the derived image for the given manifest, verb and optional varying metadata and - returns it or None if none. - """ - - @abstractmethod - def lookup_or_create_derived_image( - self, - manifest, - verb, - storage_location, - storage, - varying_metadata=None, - include_placements=False, - ): - """ - Looks up the derived image for the given maniest, verb and optional varying metadata and - returns it. - - If none exists, a new derived image is created. - """ - - @abstractmethod - def get_derived_image_signature(self, derived_image, signer_name): - """ - Returns the signature associated with the derived image and a specific signer or None if - none. - """ - - @abstractmethod - def set_derived_image_signature(self, derived_image, signer_name, signature): - """ - Sets the calculated signature for the given derived image and signer to that specified. - """ - - @abstractmethod - def delete_derived_image(self, derived_image): - """ - Deletes a derived image and all of its storage. - """ - - @abstractmethod - def set_derived_image_size(self, derived_image, compressed_size): - """ - Sets the compressed size on the given derived image. - """ - @abstractmethod def get_repo_blob_by_digest(self, repository_ref, blob_digest, include_placements=False): """ @@ -474,17 +401,14 @@ class RegistryDataInterface(object): If not possible, or an error occurs, returns None. """ - @abstractmethod - def yield_tags_for_vulnerability_notification(self, layer_id_pairs): - """ - Yields tags that contain one (or more) of the given layer ID pairs, in repositories which - have been registered for vulnerability_found notifications. - - Returns an iterator of LikelyVulnerableTag instances. - """ - @abstractmethod def find_repository_with_garbage(self, limit_to_gc_policy_s): """ Returns a repository reference to a repository that contains garbage for collection or None if none. """ + + @abstractmethod + def populate_legacy_images_for_testing(self, manifest, storage): + """ Populates legacy images for the given manifest, for testing only. This call + will fail if called under non-testing code. + """ diff --git a/data/registry_model/manifestbuilder.py b/data/registry_model/manifestbuilder.py index 4946974b2..750d9d1bc 100644 --- a/data/registry_model/manifestbuilder.py +++ b/data/registry_model/manifestbuilder.py @@ -85,8 +85,8 @@ class _ManifestBuilder(object): Returns the tags committed by this builder, if any. """ return [ - registry_model.get_repo_tag(self._repository_ref, tag_name, include_legacy_image=True) - for tag_name in list(self._builder_state.tags.keys()) + registry_model.get_repo_tag(self._repository_ref, tag_name) + for tag_name in self._builder_state.tags.keys() ] def start_layer( diff --git a/data/registry_model/registry_oci_model.py b/data/registry_model/registry_oci_model.py index cfa4c314e..3acbeaeab 100644 --- a/data/registry_model/registry_oci_model.py +++ b/data/registry_model/registry_oci_model.py @@ -25,13 +25,13 @@ from data.registry_model.datatypes import ( SecurityScanStatus, Blob, BlobUpload, - DerivedImage, ShallowTag, LikelyVulnerableTag, RepositoryReference, ManifestLayer, ) from data.registry_model.label_handlers import apply_label_to_manifest +from data.registry_model.shared import SyntheticIDHandler from image.shared import ManifestException from image.docker.schema1 import ( DOCKER_SCHEMA1_CONTENT_TYPES, @@ -42,9 +42,6 @@ from image.docker.schema2 import EMPTY_LAYER_BLOB_DIGEST logger = logging.getLogger(__name__) -# The maximum size for generated manifest after which we remove extra metadata. -MAXIMUM_GENERATED_MANIFEST_SIZE = 3 * 1024 * 1024 # 3 MB - class OCIModel(RegistryDataInterface): """ @@ -52,78 +49,71 @@ class OCIModel(RegistryDataInterface): changed to support the OCI specification. """ + def __init__(self): + self._legacy_image_id_handler = SyntheticIDHandler() + + def set_id_hash_salt(self, id_hash_salt): + self._legacy_image_id_handler = SyntheticIDHandler(id_hash_salt) + + def _resolve_legacy_image_id_to_manifest_row(self, legacy_image_id): + decoded = self._legacy_image_id_handler.decode(legacy_image_id) + if len(decoded) == 0: + return (None, None) + + manifest_id, layer_index = decoded + if manifest_id is None: + return (None, None) + + try: + return database.Manifest.get(id=manifest_id), layer_index + except database.Manifest.DoesNotExist: + return (None, None) + + def _resolve_legacy_image_id(self, legacy_image_id): + """ Decodes the given legacy image ID and returns the manifest to which it points, + as well as the layer index for the image. If invalid, or the manifest was not found, + returns (None, None). + """ + manifest, layer_index = self._resolve_legacy_image_id_to_manifest_row(legacy_image_id) + if manifest is None: + return (None, None) + + return Manifest.for_manifest(manifest, self._legacy_image_id_handler), layer_index + def get_tag_legacy_image_id(self, repository_ref, tag_name, storage): """ - Returns the legacy image ID for the tag with a legacy images in the repository. - - Returns None if None. + Returns the legacy image ID for the tag in the repository. If there is no legacy image, + returns None. """ - tag = self.get_repo_tag(repository_ref, tag_name, include_legacy_image=True) + tag = self.get_repo_tag(repository_ref, tag_name) if tag is None: return None - if tag.legacy_image_if_present is not None: - return tag.legacy_image_if_present.docker_image_id + retriever = RepositoryContentRetriever(repository_ref.id, storage) + legacy_image = tag.manifest.lookup_legacy_image(0, retriever) + if legacy_image is None: + return None - if tag.manifest.is_manifest_list: - # See if we can lookup a schema1 legacy image. - v1_compatible = self.get_schema1_parsed_manifest(tag.manifest, "", "", "", storage) - if v1_compatible is not None: - return v1_compatible.leaf_layer_v1_image_id - - return None + return legacy_image.docker_image_id def get_legacy_tags_map(self, repository_ref, storage): """ - Returns a map from tag name to its legacy image ID, for all tags with legacy images in the + Returns a map from tag name to its legacy image ID, for all tags in the repository. Note that this can be a *very* heavy operation. """ tags = oci.tag.list_alive_tags(repository_ref._db_id) - legacy_images_map = oci.tag.get_legacy_images_for_tags(tags) - tags_map = {} for tag in tags: - legacy_image = legacy_images_map.get(tag.id) - if legacy_image is not None: - tags_map[tag.name] = legacy_image.docker_image_id - else: - manifest = Manifest.for_manifest(tag.manifest, None) - if legacy_image is None and manifest.is_manifest_list: - # See if we can lookup a schema1 legacy image. - v1_compatible = self.get_schema1_parsed_manifest(manifest, "", "", "", storage) - if v1_compatible is not None: - v1_id = v1_compatible.leaf_layer_v1_image_id - if v1_id is not None: - tags_map[tag.name] = v1_id + root_id = Manifest.for_manifest( + tag.manifest, self._legacy_image_id_handler + ).legacy_image_root_id + if root_id is not None: + tags_map[tag.name] = root_id return tags_map - def _get_legacy_compatible_image_for_manifest(self, manifest, storage): - # Check for a legacy image directly on the manifest. - if not manifest.is_manifest_list: - return oci.shared.get_legacy_image_for_manifest(manifest._db_id) - - # Otherwise, lookup a legacy image associated with the v1-compatible manifest - # in the list. - try: - manifest_obj = database.Manifest.get(id=manifest._db_id) - except database.Manifest.DoesNotExist: - logger.exception("Could not find manifest for manifest `%s`", manifest._db_id) - return None - - # See if we can lookup a schema1 legacy image. - v1_compatible = self.get_schema1_parsed_manifest(manifest, "", "", "", storage) - if v1_compatible is None: - return None - - v1_id = v1_compatible.leaf_layer_v1_image_id - if v1_id is None: - return None - - return model.image.get_image(manifest_obj.repository_id, v1_id) - def find_matching_tag(self, repository_ref, tag_names): """ Finds an alive tag in the repository matching one of the given tag names and returns it or @@ -131,7 +121,7 @@ class OCIModel(RegistryDataInterface): """ found_tag = oci.tag.find_matching_tag(repository_ref._db_id, tag_names) assert found_tag is None or not found_tag.hidden - return Tag.for_tag(found_tag) + return Tag.for_tag(found_tag, self._legacy_image_id_handler) def get_most_recent_tag(self, repository_ref): """ @@ -141,27 +131,17 @@ class OCIModel(RegistryDataInterface): """ found_tag = oci.tag.get_most_recent_tag(repository_ref._db_id) assert found_tag is None or not found_tag.hidden - return Tag.for_tag(found_tag) + return Tag.for_tag(found_tag, self._legacy_image_id_handler) - def get_manifest_for_tag(self, tag, backfill_if_necessary=False, include_legacy_image=False): + def get_manifest_for_tag(self, tag): """ Returns the manifest associated with the given tag. """ assert tag is not None - - legacy_image = None - if include_legacy_image: - legacy_image = oci.shared.get_legacy_image_for_manifest(tag._manifest) - - return Manifest.for_manifest(tag._manifest, LegacyImage.for_image(legacy_image)) + return tag.manifest def lookup_manifest_by_digest( - self, - repository_ref, - manifest_digest, - allow_dead=False, - include_legacy_image=False, - require_available=False, + self, repository_ref, manifest_digest, allow_dead=False, require_available=False, ): """ Looks up the manifest with the given digest under the given repository and returns it or @@ -176,19 +156,7 @@ class OCIModel(RegistryDataInterface): if manifest is None: return None - legacy_image = None - if include_legacy_image: - try: - legacy_image_id = database.ManifestLegacyImage.get( - manifest=manifest - ).image.docker_image_id - legacy_image = self.get_legacy_image( - repository_ref, legacy_image_id, include_parents=True - ) - except database.ManifestLegacyImage.DoesNotExist: - pass - - return Manifest.for_manifest(manifest, legacy_image) + return Manifest.for_manifest(manifest, self._legacy_image_id_handler) def create_manifest_label(self, manifest, key, value, source_type_name, media_type_name=None): """ @@ -276,22 +244,15 @@ class OCIModel(RegistryDataInterface): tags = oci.tag.lookup_alive_tags_shallow(repository_ref._db_id, start_pagination_id, limit) return [ShallowTag.for_tag(tag) for tag in tags] - def list_all_active_repository_tags(self, repository_ref, include_legacy_images=False): + def list_all_active_repository_tags(self, repository_ref): """ Returns a list of all the active tags in the repository. Note that this is a *HEAVY* operation on repositories with a lot of tags, and should only be - used for testing or where other more specific operations are not possible. + used for testing or legacy operations. """ tags = list(oci.tag.list_alive_tags(repository_ref._db_id)) - legacy_images_map = {} - if include_legacy_images: - legacy_images_map = oci.tag.get_legacy_images_for_tags(tags) - - return [ - Tag.for_tag(tag, legacy_image=LegacyImage.for_image(legacy_images_map.get(tag.id))) - for tag in tags - ] + return [Tag.for_tag(tag, self._legacy_image_id_handler) for tag in tags] def list_repository_tag_history( self, @@ -312,11 +273,19 @@ class OCIModel(RegistryDataInterface): repository_ref._db_id, page, size, specific_tag_name, active_tags_only, since_time_ms ) - # TODO: do we need legacy images here? - legacy_images_map = oci.tag.get_legacy_images_for_tags(tags) + # TODO: Remove this once the layers compressed sizes have been fully backfilled. + tags_missing_sizes = [tag for tag in tags if tag.manifest.layers_compressed_size is None] + legacy_images_map = {} + if tags_missing_sizes: + legacy_images_map = oci.tag.get_legacy_images_for_tags(tags_missing_sizes) + return ( [ - Tag.for_tag(tag, LegacyImage.for_image(legacy_images_map.get(tag.id))) + Tag.for_tag( + tag, + self._legacy_image_id_handler, + legacy_image_row=legacy_images_map.get(tag.id), + ) for tag in tags ], has_more, @@ -342,7 +311,7 @@ class OCIModel(RegistryDataInterface): return {repo_id: toSeconds(ms) for repo_id, ms in list(last_modified.items())} - def get_repo_tag(self, repository_ref, tag_name, include_legacy_image=False): + def get_repo_tag(self, repository_ref, tag_name): """ Returns the latest, *active* tag found in the repository, with the matching name or None if none. @@ -353,12 +322,7 @@ class OCIModel(RegistryDataInterface): if tag is None: return None - legacy_image = None - if include_legacy_image: - legacy_images = oci.tag.get_legacy_images_for_tags([tag]) - legacy_image = legacy_images.get(tag.id) - - return Tag.for_tag(tag, legacy_image=LegacyImage.for_image(legacy_image)) + return Tag.for_tag(tag, self._legacy_image_id_handler) def create_manifest_and_retarget_tag( self, repository_ref, manifest_interface_instance, tag_name, storage, raise_on_error=False @@ -395,9 +359,9 @@ class OCIModel(RegistryDataInterface): if tag is None: return (None, None) - legacy_image = oci.shared.get_legacy_image_for_manifest(created_manifest.manifest) - li = LegacyImage.for_image(legacy_image) - wrapped_manifest = Manifest.for_manifest(created_manifest.manifest, li) + wrapped_manifest = Manifest.for_manifest( + created_manifest.manifest, self._legacy_image_id_handler + ) # Apply any labels that should modify the created tag. if created_manifest.labels_to_apply: @@ -407,7 +371,12 @@ class OCIModel(RegistryDataInterface): # Reload the tag in case any updates were applied. tag = database.Tag.get(id=tag.id) - return (wrapped_manifest, Tag.for_tag(tag, li)) + return ( + wrapped_manifest, + Tag.for_tag( + tag, self._legacy_image_id_handler, manifest_row=created_manifest.manifest + ), + ) def retarget_tag( self, @@ -427,62 +396,37 @@ class OCIModel(RegistryDataInterface): """ with db_disallow_replica_use(): assert legacy_manifest_key is not None - manifest_id = manifest_or_legacy_image._db_id - if isinstance(manifest_or_legacy_image, LegacyImage): - # If a legacy image was required, build a new manifest for it and move the tag to that. + manifest = manifest_or_legacy_image.as_manifest() + manifest_id = manifest._db_id + + # If the manifest is a schema 1 manifest and its tag name does not match that + # specified, then we need to create a new manifest, but with that tag name. + if manifest.media_type in DOCKER_SCHEMA1_CONTENT_TYPES: try: - image_row = database.Image.get(id=manifest_or_legacy_image._db_id) - except database.Image.DoesNotExist: + parsed = manifest.get_parsed_manifest() + except ManifestException: + logger.exception( + "Could not parse manifest `%s` in retarget_tag", manifest._db_id, + ) return None - manifest_instance = self._build_manifest_for_legacy_image(tag_name, image_row) - if manifest_instance is None: - return None + if parsed.tag != tag_name: + logger.debug( + "Rewriting manifest `%s` for tag named `%s`", manifest._db_id, tag_name, + ) - created = oci.manifest.get_or_create_manifest( - repository_ref._db_id, manifest_instance, storage - ) - if created is None: - return None + repository_id = repository_ref._db_id + updated = parsed.with_tag_name(tag_name, legacy_manifest_key) + assert updated.is_signed - manifest_id = created.manifest.id - else: - # If the manifest is a schema 1 manifest and its tag name does not match that - # specified, then we need to create a new manifest, but with that tag name. - if manifest_or_legacy_image.media_type in DOCKER_SCHEMA1_CONTENT_TYPES: - try: - parsed = manifest_or_legacy_image.get_parsed_manifest() - except ManifestException: - logger.exception( - "Could not parse manifest `%s` in retarget_tag", - manifest_or_legacy_image._db_id, - ) + created = oci.manifest.get_or_create_manifest(repository_id, updated, storage) + if created is None: return None - if parsed.tag != tag_name: - logger.debug( - "Rewriting manifest `%s` for tag named `%s`", - manifest_or_legacy_image._db_id, - tag_name, - ) - - repository_id = repository_ref._db_id - updated = parsed.with_tag_name(tag_name, legacy_manifest_key) - assert updated.is_signed - - created = oci.manifest.get_or_create_manifest( - repository_id, updated, storage - ) - if created is None: - return None - - manifest_id = created.manifest.id + manifest_id = created.manifest.id tag = oci.tag.retarget_tag(tag_name, manifest_id, is_reversion=is_reversion) - legacy_image = LegacyImage.for_image( - oci.shared.get_legacy_image_for_manifest(manifest_id) - ) - return Tag.for_tag(tag, legacy_image) + return Tag.for_tag(tag, self._legacy_image_id_handler) def delete_tag(self, repository_ref, tag_name): """ @@ -496,18 +440,18 @@ class OCIModel(RegistryDataInterface): msg = "Invalid repository tag '%s' on repository" % tag_name raise DataModelException(msg) - return Tag.for_tag(deleted_tag) + return Tag.for_tag(deleted_tag, self._legacy_image_id_handler) def delete_tags_for_manifest(self, manifest): """ Deletes all tags pointing to the given manifest, making the manifest inaccessible for pulling. - Returns the tags deleted, if any. Returns None on error. + Returns the tags (ShallowTag) deleted. Returns None on error. """ with db_disallow_replica_use(): deleted_tags = oci.tag.delete_tags_for_manifest(manifest._db_id) - return [Tag.for_tag(tag) for tag in deleted_tags] + return [ShallowTag.for_tag(tag) for tag in deleted_tags] def change_repository_tag_expiration(self, tag, expiration_date): """ @@ -519,75 +463,15 @@ class OCIModel(RegistryDataInterface): with db_disallow_replica_use(): return oci.tag.change_tag_expiration(tag._db_id, expiration_date) - def get_legacy_images_owned_by_tag(self, tag): - """ - Returns all legacy images *solely owned and used* by the given tag. - """ - tag_obj = oci.tag.get_tag_by_id(tag._db_id) - if tag_obj is None: - return None - - tags = oci.tag.list_alive_tags(tag_obj.repository_id) - legacy_images = oci.tag.get_legacy_images_for_tags(tags) - - tag_legacy_image = legacy_images.get(tag._db_id) - if tag_legacy_image is None: - return None - - assert isinstance(tag_legacy_image, Image) - - # Collect the IDs of all images that the tag uses. - tag_image_ids = set() - tag_image_ids.add(tag_legacy_image.id) - tag_image_ids.update(tag_legacy_image.ancestor_id_list()) - - # Remove any images shared by other tags. - for current in tags: - if current == tag_obj: - continue - - current_image = legacy_images.get(current.id) - if current_image is None: - continue - - tag_image_ids.discard(current_image.id) - tag_image_ids = tag_image_ids.difference(current_image.ancestor_id_list()) - if not tag_image_ids: - return [] - - if not tag_image_ids: - return [] - - # Load the images we need to return. - images = database.Image.select().where(database.Image.id << list(tag_image_ids)) - all_image_ids = set() - for image in images: - all_image_ids.add(image.id) - all_image_ids.update(image.ancestor_id_list()) - - # Build a map of all the images and their parents. - images_map = {} - all_images = database.Image.select().where(database.Image.id << list(all_image_ids)) - for image in all_images: - images_map[image.id] = image - - return [LegacyImage.for_image(image, images_map=images_map) for image in images] - def get_security_status(self, manifest_or_legacy_image): """ Returns the security status for the given manifest or legacy image or None if none. """ - image = None - - if isinstance(manifest_or_legacy_image, Manifest): - image = oci.shared.get_legacy_image_for_manifest(manifest_or_legacy_image._db_id) - if image is None: - return SecurityScanStatus.UNSUPPORTED - else: - try: - image = database.Image.get(id=manifest_or_legacy_image._db_id) - except database.Image.DoesNotExist: - return None + # TODO: change from using the Image row once we've moved all security info into MSS. + manifest_id = manifest_or_legacy_image.as_manifest()._db_id + image = oci.shared.get_legacy_image_for_manifest(manifest_id) + if image is None: + return SecurityScanStatus.UNSUPPORTED if image.security_indexed_engine is not None and image.security_indexed_engine >= 0: return ( @@ -602,22 +486,16 @@ class OCIModel(RegistryDataInterface): re-indexed. """ with db_disallow_replica_use(): - image = None + # TODO: change from using the Image row once we've moved all security info into MSS. + manifest_id = manifest_or_legacy_image.as_manifest()._db_id + image = oci.shared.get_legacy_image_for_manifest(manifest_id) + if image is None: + return None - if isinstance(manifest_or_legacy_image, Manifest): - image = oci.shared.get_legacy_image_for_manifest(manifest_or_legacy_image._db_id) - if image is None: - return None - else: - try: - image = database.Image.get(id=manifest_or_legacy_image._db_id) - except database.Image.DoesNotExist: - return None - - assert image - image.security_indexed = False - image.security_indexed_engine = IMAGE_NOT_SCANNED_ENGINE_VERSION - image.save() + assert image + image.security_indexed = False + image.security_indexed_engine = IMAGE_NOT_SCANNED_ENGINE_VERSION + image.save() def list_manifest_layers(self, manifest, storage, include_placements=False): try: @@ -633,48 +511,9 @@ class OCIModel(RegistryDataInterface): return None return self._list_manifest_layers( - manifest_obj.repository_id, parsed, storage, include_placements, by_manifest=True + manifest_obj.repository_id, parsed, storage, include_placements ) - def lookup_derived_image( - self, manifest, verb, storage, varying_metadata=None, include_placements=False - ): - """ - Looks up the derived image for the given manifest, verb and optional varying metadata and - returns it or None if none. - """ - legacy_image = self._get_legacy_compatible_image_for_manifest(manifest, storage) - if legacy_image is None: - return None - - derived = model.image.find_derived_storage_for_image(legacy_image, verb, varying_metadata) - return self._build_derived(derived, verb, varying_metadata, include_placements) - - def lookup_or_create_derived_image( - self, - manifest, - verb, - storage_location, - storage, - varying_metadata=None, - include_placements=False, - ): - """ - Looks up the derived image for the given maniest, verb and optional varying metadata and - returns it. - - If none exists, a new derived image is created. - """ - with db_disallow_replica_use(): - legacy_image = self._get_legacy_compatible_image_for_manifest(manifest, storage) - if legacy_image is None: - return None - - derived = model.image.find_or_create_derived_storage( - legacy_image, verb, storage_location, varying_metadata - ) - return self._build_derived(derived, verb, varying_metadata, include_placements) - def set_tags_expiration_for_manifest(self, manifest, expiration_sec): """ Sets the expiration on all tags that point to the given manifest to that specified. @@ -737,9 +576,7 @@ class OCIModel(RegistryDataInterface): if created_manifest is None: return None - legacy_image = oci.shared.get_legacy_image_for_manifest(created_manifest.manifest) - li = LegacyImage.for_image(legacy_image) - return Manifest.for_manifest(created_manifest.manifest, li) + return Manifest.for_manifest(created_manifest.manifest, self._legacy_image_id_handler) def get_repo_blob_by_digest(self, repository_ref, blob_digest, include_placements=False): """ @@ -777,11 +614,7 @@ class OCIModel(RegistryDataInterface): specified). """ return self._list_manifest_layers( - repository_ref._db_id, - parsed_manifest, - storage, - include_placements=include_placements, - by_manifest=True, + repository_ref._db_id, parsed_manifest, storage, include_placements=include_placements, ) def get_manifest_local_blobs(self, manifest, include_placements=False): @@ -794,25 +627,9 @@ class OCIModel(RegistryDataInterface): return None return self._get_manifest_local_blobs( - manifest, manifest_row.repository_id, include_placements, by_manifest=True + manifest, manifest_row.repository_id, include_placements ) - def yield_tags_for_vulnerability_notification(self, layer_id_pairs): - """ - Yields tags that contain one (or more) of the given layer ID pairs, in repositories which - have been registered for vulnerability_found notifications. - - Returns an iterator of LikelyVulnerableTag instances. - """ - for docker_image_id, storage_uuid in layer_id_pairs: - tags = oci.tag.lookup_notifiable_tags_for_legacy_image( - docker_image_id, storage_uuid, "vulnerability_found" - ) - for tag in tags: - yield LikelyVulnerableTag.for_tag( - tag, tag.repository, docker_image_id, storage_uuid - ) - def find_repository_with_garbage(self, limit_to_gc_policy_s): repo = model.oci.tag.find_repository_with_garbage(limit_to_gc_policy_s) if repo is None: @@ -849,66 +666,6 @@ class OCIModel(RegistryDataInterface): namespace = model.user.get_namespace_user(namespace_name) return namespace is not None and namespace.enabled - def get_derived_image_signature(self, derived_image, signer_name): - """ - Returns the signature associated with the derived image and a specific signer or None if - none. - """ - try: - derived_storage = database.DerivedStorageForImage.get(id=derived_image._db_id) - except database.DerivedStorageForImage.DoesNotExist: - return None - - storage = derived_storage.derivative - signature_entry = model.storage.lookup_storage_signature(storage, signer_name) - if signature_entry is None: - return None - - return signature_entry.signature - - def set_derived_image_signature(self, derived_image, signer_name, signature): - """ - Sets the calculated signature for the given derived image and signer to that specified. - """ - with db_disallow_replica_use(): - try: - derived_storage = database.DerivedStorageForImage.get(id=derived_image._db_id) - except database.DerivedStorageForImage.DoesNotExist: - return None - - storage = derived_storage.derivative - signature_entry = model.storage.find_or_create_storage_signature(storage, signer_name) - signature_entry.signature = signature - signature_entry.uploading = False - signature_entry.save() - - def delete_derived_image(self, derived_image): - """ - Deletes a derived image and all of its storage. - """ - with db_disallow_replica_use(): - try: - derived_storage = database.DerivedStorageForImage.get(id=derived_image._db_id) - except database.DerivedStorageForImage.DoesNotExist: - return None - - model.image.delete_derived_storage(derived_storage) - - def set_derived_image_size(self, derived_image, compressed_size): - """ - Sets the compressed size on the given derived image. - """ - with db_disallow_replica_use(): - try: - derived_storage = database.DerivedStorageForImage.get(id=derived_image._db_id) - except database.DerivedStorageForImage.DoesNotExist: - return None - - storage_entry = derived_storage.derivative - storage_entry.image_size = compressed_size - storage_entry.uploading = False - storage_entry.save() - def lookup_cached_active_repository_tags( self, model_cache, repository_ref, start_pagination_id, limit ): @@ -1098,68 +855,41 @@ class OCIModel(RegistryDataInterface): ) return bool(storage) - def get_legacy_images(self, repository_ref): + def get_legacy_image(self, repository_ref, docker_image_id, storage, include_blob=False): """ - Returns an iterator of all the LegacyImage's defined in the matching repository. - """ - repo = model.repository.lookup_repository(repository_ref._db_id) - if repo is None: - return None - - all_images = model.image.get_repository_images_without_placements(repo) - all_images_map = {image.id: image for image in all_images} - - all_tags = model.oci.tag.list_alive_tags(repo) - tags_by_image_id = defaultdict(list) - for tag in all_tags: - try: - mli = database.ManifestLegacyImage.get(manifest=tag.manifest_id) - tags_by_image_id[mli.image_id].append(tag) - except database.ManifestLegacyImage.DoesNotExist: - continue - - return [ - LegacyImage.for_image(image, images_map=all_images_map, tags_map=tags_by_image_id) - for image in all_images - ] - - def get_legacy_image( - self, repository_ref, docker_image_id, include_parents=False, include_blob=False - ): - """ - Returns the matching LegacyImages under the matching repository, if any. + Returns the matching LegacyImage under the matching repository, if any. If none, returns None. """ - repo = model.repository.lookup_repository(repository_ref._db_id) - if repo is None: + retriever = RepositoryContentRetriever(repository_ref._db_id, storage) + + # Resolves the manifest and the layer index from the synthetic ID. + manifest, layer_index = self._resolve_legacy_image_id(docker_image_id) + if manifest is None: return None - image = model.image.get_image(repository_ref._db_id, docker_image_id) - if image is None: - return None + # Lookup the legacy image for the index. + legacy_image = manifest.lookup_legacy_image(layer_index, retriever) + if legacy_image is None or not include_blob: + return legacy_image - parent_images_map = None - if include_parents: - parent_images = model.image.get_parent_images( - repo.namespace_user.username, repo.name, image + # If a blob was requested, load it into the legacy image. + return legacy_image.with_blob( + self.get_repo_blob_by_digest( + repository_ref, legacy_image.blob_digest, include_placements=True ) - parent_images_map = {image.id: image for image in parent_images} + ) - blob = None - if include_blob: - placements = list(model.storage.get_storage_locations(image.storage.uuid)) - blob = Blob.for_image_storage( - image.storage, - storage_path=model.storage.get_layer_path(image.storage), - placements=placements, - ) + def populate_legacy_images_for_testing(self, manifest, storage): + """ Populates legacy images for the given manifest, for testing only. This call + will fail if called under non-testing code. + """ + manifest_row = database.Manifest.get(id=manifest._db_id) + oci.manifest.populate_legacy_images_for_testing( + manifest_row, manifest.get_parsed_manifest(), storage + ) - return LegacyImage.for_image(image, images_map=parent_images_map, blob=blob) - - def _get_manifest_local_blobs( - self, manifest, repo_id, include_placements=False, by_manifest=False - ): + def _get_manifest_local_blobs(self, manifest, repo_id, include_placements=False): parsed = manifest.get_parsed_manifest() if parsed is None: return None @@ -1168,9 +898,7 @@ class OCIModel(RegistryDataInterface): if not len(local_blob_digests): return [] - blob_query = self._lookup_repo_storages_by_content_checksum( - repo_id, local_blob_digests, by_manifest=by_manifest - ) + blob_query = self._lookup_repo_storages_by_content_checksum(repo_id, local_blob_digests) blobs = [] for image_storage in blob_query: placements = None @@ -1186,9 +914,7 @@ class OCIModel(RegistryDataInterface): return blobs - def _list_manifest_layers( - self, repo_id, parsed, storage, include_placements=False, by_manifest=False - ): + def _list_manifest_layers(self, repo_id, parsed, storage, include_placements=False): """ Returns an *ordered list* of the layers found in the manifest, starting at the base and working towards the leaf, including the associated Blob and its placements (if specified). @@ -1206,9 +932,7 @@ class OCIModel(RegistryDataInterface): blob_digests.append(EMPTY_LAYER_BLOB_DIGEST) if blob_digests: - blob_query = self._lookup_repo_storages_by_content_checksum( - repo_id, blob_digests, by_manifest=by_manifest - ) + blob_query = self._lookup_repo_storages_by_content_checksum(repo_id, blob_digests) storage_map = {blob.content_checksum: blob for blob in blob_query} layers = parsed.get_layers(retriever) @@ -1246,84 +970,6 @@ class OCIModel(RegistryDataInterface): return manifest_layers - def _build_derived(self, derived, verb, varying_metadata, include_placements): - if derived is None: - return None - - derived_storage = derived.derivative - placements = None - if include_placements: - placements = list(model.storage.get_storage_locations(derived_storage.uuid)) - - blob = Blob.for_image_storage( - derived_storage, - storage_path=model.storage.get_layer_path(derived_storage), - placements=placements, - ) - - return DerivedImage.for_derived_storage(derived, verb, varying_metadata, blob) - - def _build_manifest_for_legacy_image(self, tag_name, legacy_image_row): - import features - - from app import app, docker_v2_signing_key - - repo = legacy_image_row.repository - namespace_name = repo.namespace_user.username - repo_name = repo.name - - # Find the v1 metadata for this image and its parents. - try: - parents = model.image.get_parent_images(namespace_name, repo_name, legacy_image_row) - except model.DataModelException: - logger.exception( - "Could not load parent images for legacy image %s", legacy_image_row.id - ) - return None - - # If the manifest is being generated under the library namespace, then we make its namespace - # empty. - manifest_namespace = namespace_name - if features.LIBRARY_SUPPORT and namespace_name == app.config["LIBRARY_NAMESPACE"]: - manifest_namespace = "" - - # Create and populate the manifest builder - builder = DockerSchema1ManifestBuilder(manifest_namespace, repo_name, tag_name) - - # Add the leaf layer - builder.add_layer( - legacy_image_row.storage.content_checksum, legacy_image_row.v1_json_metadata - ) - if legacy_image_row.storage.uploading: - logger.error("Cannot add an uploading storage row: %s", legacy_image_row.storage.id) - return None - - for parent_image in parents: - if parent_image.storage.uploading: - logger.error("Cannot add an uploading storage row: %s", legacy_image_row.storage.id) - return None - - builder.add_layer(parent_image.storage.content_checksum, parent_image.v1_json_metadata) - - try: - built_manifest = builder.build(docker_v2_signing_key) - - # If the generated manifest is greater than the maximum size, regenerate it with - # intermediate metadata layers stripped down to their bare essentials. - if len(built_manifest.bytes.as_encoded_str()) > MAXIMUM_GENERATED_MANIFEST_SIZE: - built_manifest = builder.with_metadata_removed().build(docker_v2_signing_key) - - if len(built_manifest.bytes.as_encoded_str()) > MAXIMUM_GENERATED_MANIFEST_SIZE: - logger.error("Legacy image is too large to generate manifest") - return None - - return built_manifest - except ManifestException as me: - logger.exception( - "Got exception when trying to build manifest for legacy image %s", legacy_image_row - ) - return None - def _get_shared_storage(self, blob_digest): """ Returns an ImageStorage row for the blob digest if it is a globally shared storage. @@ -1337,7 +983,7 @@ class OCIModel(RegistryDataInterface): return None - def _lookup_repo_storages_by_content_checksum(self, repo, checksums, by_manifest=False): + def _lookup_repo_storages_by_content_checksum(self, repo, checksums): checksums = set(checksums) # Load any shared storages first. @@ -1350,11 +996,7 @@ class OCIModel(RegistryDataInterface): found = [] if checksums: - found = list( - model.storage.lookup_repo_storages_by_content_checksum( - repo, checksums, by_manifest=by_manifest - ) - ) + found = list(model.storage.lookup_repo_storages_by_content_checksum(repo, checksums)) return found + extra_storages diff --git a/data/registry_model/shared.py b/data/registry_model/shared.py new file mode 100644 index 000000000..67b2821c9 --- /dev/null +++ b/data/registry_model/shared.py @@ -0,0 +1,17 @@ +import uuid + +from hashids import Hashids + + +class SyntheticIDHandler(object): + def __init__(self, hash_salt=None): + self.hash_salt = hash_salt or str(uuid.uuid4()) + self.hashids = Hashids(alphabet="0123456789abcdef", min_length=64, salt=self.hash_salt) + + def encode(self, manifest_id, layer_index=0): + encoded = self.hashids.encode(manifest_id, layer_index) + assert len(encoded) == 64 + return encoded + + def decode(self, synthetic_v1_id): + return self.hashids.decode(synthetic_v1_id) diff --git a/data/registry_model/test/test_interface.py b/data/registry_model/test/test_interface.py index 16c894e8b..f126b7c8f 100644 --- a/data/registry_model/test/test_interface.py +++ b/data/registry_model/test/test_interface.py @@ -23,7 +23,6 @@ from data.database import ( ManifestLabel, TagManifest, TagManifestLabel, - DerivedStorageForImage, Tag, TagToRepositoryTag, ImageStorageLocation, @@ -32,6 +31,7 @@ from data.cache.impl import InMemoryDataModelCache from data.registry_model.registry_oci_model import OCIModel from data.registry_model.datatypes import RepositoryReference from data.registry_model.blobuploader import upload_blob, BlobUploadSettings +from data.model.oci.retriever import RepositoryContentRetriever from data.model.blob import store_blob_record_and_temp_link from image.shared.types import ManifestImageLayer from image.docker.schema1 import ( @@ -78,7 +78,6 @@ def test_find_matching_tag(names, expected, registry_model): assert found is None else: assert found.name in expected - assert found.repository.namespace_name == "devtable" assert found.repository.name == "simple" @@ -120,13 +119,9 @@ def test_lookup_manifests(repo_namespace, repo_name, registry_model): repository_ref = RepositoryReference.for_repo_obj(repo) found_tag = registry_model.find_matching_tag(repository_ref, ["latest"]) found_manifest = registry_model.get_manifest_for_tag(found_tag) - found = registry_model.lookup_manifest_by_digest( - repository_ref, found_manifest.digest, include_legacy_image=True - ) + found = registry_model.lookup_manifest_by_digest(repository_ref, found_manifest.digest) assert found._db_id == found_manifest._db_id assert found.digest == found_manifest.digest - assert found.legacy_image - assert found.legacy_image.parents schema1_parsed = registry_model.get_schema1_parsed_manifest(found, "foo", "bar", "baz", storage) assert schema1_parsed is not None @@ -211,26 +206,24 @@ def test_batch_labels(registry_model): ) def test_repository_tags(repo_namespace, repo_name, registry_model): repository_ref = registry_model.lookup_repository(repo_namespace, repo_name) - tags = registry_model.list_all_active_repository_tags( - repository_ref, include_legacy_images=True - ) + tags = registry_model.list_all_active_repository_tags(repository_ref) assert len(tags) tags_map = registry_model.get_legacy_tags_map(repository_ref, storage) for tag in tags: - found_tag = registry_model.get_repo_tag(repository_ref, tag.name, include_legacy_image=True) + found_tag = registry_model.get_repo_tag(repository_ref, tag.name) assert found_tag == tag - if found_tag.legacy_image is None: - continue - + retriever = RepositoryContentRetriever(repository_ref.id, storage) + legacy_image = tag.manifest.lookup_legacy_image(0, retriever) found_image = registry_model.get_legacy_image( - repository_ref, found_tag.legacy_image.docker_image_id + repository_ref, found_tag.manifest.legacy_image_root_id, storage ) - assert found_image == found_tag.legacy_image - assert tag.name in tags_map - assert tags_map[tag.name] == found_image.docker_image_id + + if found_image is not None: + assert found_image.docker_image_id == legacy_image.docker_image_id + assert tags_map[tag.name] == found_image.docker_image_id @pytest.mark.parametrize( @@ -242,12 +235,19 @@ def test_repository_tags(repo_namespace, repo_name, registry_model): ("public", "publicrepo", 1, False), ], ) -def test_repository_tag_history(namespace, name, expected_tag_count, has_expired, registry_model): +@pytest.mark.parametrize("with_size_fallback", [False, True,]) +def test_repository_tag_history( + namespace, name, expected_tag_count, has_expired, registry_model, with_size_fallback +): # Pre-cache media type loads to ensure consistent query count. Manifest.media_type.get_name(1) + # If size fallback is requested, delete the sizes on the manifest rows. + if with_size_fallback: + Manifest.update(layers_compressed_size=None).execute() + repository_ref = registry_model.lookup_repository(namespace, name) - with assert_query_count(2): + with assert_query_count(2 if with_size_fallback else 1): history, has_more = registry_model.list_repository_tag_history(repository_ref) assert not has_more assert len(history) == expected_tag_count @@ -323,9 +323,7 @@ def test_delete_tags(repo_namespace, repo_name, via_manifest, registry_model): # Make sure the tag is no longer found. with assert_query_count(1): - found_tag = registry_model.get_repo_tag( - repository_ref, tag.name, include_legacy_image=True - ) + found_tag = registry_model.get_repo_tag(repository_ref, tag.name) assert found_tag is None # Ensure all tags have been deleted. @@ -347,7 +345,9 @@ def test_retarget_tag_history(use_manifest, registry_model): repository_ref, history[0].manifest_digest, allow_dead=True ) else: - manifest_or_legacy_image = history[0].legacy_image + manifest_or_legacy_image = registry_model.get_legacy_image( + repository_ref, history[0].manifest.legacy_image_root_id, storage + ) # Retarget the tag. assert manifest_or_legacy_image @@ -364,7 +364,7 @@ def test_retarget_tag_history(use_manifest, registry_model): if use_manifest: assert updated_tag.manifest_digest == manifest_or_legacy_image.digest else: - assert updated_tag.legacy_image == manifest_or_legacy_image + assert updated_tag.manifest.legacy_image_root_id == manifest_or_legacy_image.docker_image_id # Ensure history has been updated. new_history, _ = registry_model.list_repository_tag_history(repository_ref) @@ -388,15 +388,17 @@ def test_change_repository_tag_expiration(registry_model): def test_get_security_status(registry_model): repository_ref = registry_model.lookup_repository("devtable", "simple") - tags = registry_model.list_all_active_repository_tags( - repository_ref, include_legacy_images=True - ) + tags = registry_model.list_all_active_repository_tags(repository_ref) assert len(tags) for tag in tags: - assert registry_model.get_security_status(tag.legacy_image) - registry_model.reset_security_status(tag.legacy_image) - assert registry_model.get_security_status(tag.legacy_image) + legacy_image = registry_model.get_legacy_image( + repository_ref, tag.manifest.legacy_image_root_id, storage + ) + assert legacy_image + assert registry_model.get_security_status(legacy_image) + registry_model.reset_security_status(legacy_image) + assert registry_model.get_security_status(legacy_image) @pytest.fixture() @@ -504,145 +506,6 @@ def test_manifest_remote_layers(oci_model): assert layers[0].blob is None -def test_derived_image(registry_model): - # Clear all existing derived storage. - DerivedStorageForImage.delete().execute() - - repository_ref = registry_model.lookup_repository("devtable", "simple") - tag = registry_model.get_repo_tag(repository_ref, "latest") - manifest = registry_model.get_manifest_for_tag(tag) - - # Ensure the squashed image doesn't exist. - assert registry_model.lookup_derived_image(manifest, "squash", storage, {}) is None - - # Create a new one. - squashed = registry_model.lookup_or_create_derived_image( - manifest, "squash", "local_us", storage, {} - ) - assert ( - registry_model.lookup_or_create_derived_image(manifest, "squash", "local_us", storage, {}) - == squashed - ) - assert squashed.unique_id - - # Check and set the size. - assert squashed.blob.compressed_size is None - registry_model.set_derived_image_size(squashed, 1234) - - found = registry_model.lookup_derived_image(manifest, "squash", storage, {}) - assert found.blob.compressed_size == 1234 - assert found.unique_id == squashed.unique_id - - # Ensure its returned now. - assert found == squashed - - # Ensure different metadata results in a different derived image. - found = registry_model.lookup_derived_image(manifest, "squash", storage, {"foo": "bar"}) - assert found is None - - squashed_foo = registry_model.lookup_or_create_derived_image( - manifest, "squash", "local_us", storage, {"foo": "bar"} - ) - assert squashed_foo != squashed - - found = registry_model.lookup_derived_image(manifest, "squash", storage, {"foo": "bar"}) - assert found == squashed_foo - - assert squashed.unique_id != squashed_foo.unique_id - - # Lookup with placements. - squashed = registry_model.lookup_or_create_derived_image( - manifest, "squash", "local_us", storage, {}, include_placements=True - ) - assert squashed.blob.placements - - # Delete the derived image. - registry_model.delete_derived_image(squashed) - assert registry_model.lookup_derived_image(manifest, "squash", storage, {}) is None - - -def test_derived_image_signatures(registry_model): - repository_ref = registry_model.lookup_repository("devtable", "simple") - tag = registry_model.get_repo_tag(repository_ref, "latest") - manifest = registry_model.get_manifest_for_tag(tag) - - derived = registry_model.lookup_or_create_derived_image( - manifest, "squash", "local_us", storage, {} - ) - assert derived - - registry_model.set_derived_image_signature(derived, "gpg2", "foo") - assert registry_model.get_derived_image_signature(derived, "gpg2") == "foo" - - -@pytest.mark.parametrize( - "manifest_builder, list_builder", - [ - (DockerSchema2ManifestBuilder, DockerSchema2ManifestListBuilder), - (OCIManifestBuilder, OCIIndexBuilder), - ], -) -def test_derived_image_for_manifest_list(manifest_builder, list_builder, oci_model): - # Clear all existing derived storage. - DerivedStorageForImage.delete().execute() - - # Create a config blob for testing. - config_json = json.dumps( - { - "config": {}, - "architecture": "amd64", - "os": "linux", - "rootfs": {"type": "layers", "diff_ids": []}, - "history": [ - {"created": "2018-04-03T18:37:09.284840891Z", "created_by": "do something",}, - ], - } - ) - - app_config = {"TESTING": True} - repository_ref = oci_model.lookup_repository("devtable", "simple") - with upload_blob(repository_ref, storage, BlobUploadSettings(500, 500)) as upload: - upload.upload_chunk(app_config, BytesIO(config_json.encode("utf-8"))) - blob = upload.commit_to_blob(app_config) - - # Create the manifest in the repo. - builder = manifest_builder() - builder.set_config_digest(blob.digest, blob.compressed_size) - builder.add_layer(blob.digest, blob.compressed_size) - amd64_manifest = builder.build() - - oci_model.create_manifest_and_retarget_tag( - repository_ref, amd64_manifest, "submanifest", storage, raise_on_error=True - ) - - # Create a manifest list, pointing to at least one amd64+linux manifest. - builder = list_builder() - builder.add_manifest(amd64_manifest, "amd64", "linux") - manifestlist = builder.build() - - oci_model.create_manifest_and_retarget_tag( - repository_ref, manifestlist, "listtag", storage, raise_on_error=True - ) - - manifest = oci_model.get_manifest_for_tag(oci_model.get_repo_tag(repository_ref, "listtag")) - assert manifest - assert manifest.get_parsed_manifest().is_manifest_list - - # Ensure the squashed image doesn't exist. - assert oci_model.lookup_derived_image(manifest, "squash", storage, {}) is None - - # Create a new one. - squashed = oci_model.lookup_or_create_derived_image(manifest, "squash", "local_us", storage, {}) - assert squashed.unique_id - assert ( - oci_model.lookup_or_create_derived_image(manifest, "squash", "local_us", storage, {}) - == squashed - ) - - # Perform lookup. - assert oci_model.lookup_derived_image(manifest, "squash", storage, {}) == squashed - - def test_blob_uploads(registry_model): repository_ref = registry_model.lookup_repository("devtable", "simple") @@ -763,13 +626,11 @@ def test_get_cached_repo_blob(registry_model): def test_create_manifest_and_retarget_tag(registry_model): repository_ref = registry_model.lookup_repository("devtable", "simple") - latest_tag = registry_model.get_repo_tag(repository_ref, "latest", include_legacy_image=True) + latest_tag = registry_model.get_repo_tag(repository_ref, "latest") manifest = registry_model.get_manifest_for_tag(latest_tag).get_parsed_manifest() builder = DockerSchema1ManifestBuilder("devtable", "simple", "anothertag") - builder.add_layer( - manifest.blob_digests[0], '{"id": "%s"}' % latest_tag.legacy_image.docker_image_id - ) + builder.add_layer(manifest.blob_digests[0], '{"id": "%s"}' % "someid") sample_manifest = builder.build(docker_v2_signing_key) assert sample_manifest is not None @@ -785,14 +646,14 @@ def test_create_manifest_and_retarget_tag(registry_model): def test_get_schema1_parsed_manifest(registry_model): repository_ref = registry_model.lookup_repository("devtable", "simple") - latest_tag = registry_model.get_repo_tag(repository_ref, "latest", include_legacy_image=True) + latest_tag = registry_model.get_repo_tag(repository_ref, "latest") manifest = registry_model.get_manifest_for_tag(latest_tag) assert registry_model.get_schema1_parsed_manifest(manifest, "", "", "", storage) def test_convert_manifest(registry_model): repository_ref = registry_model.lookup_repository("devtable", "simple") - latest_tag = registry_model.get_repo_tag(repository_ref, "latest", include_legacy_image=True) + latest_tag = registry_model.get_repo_tag(repository_ref, "latest") manifest = registry_model.get_manifest_for_tag(latest_tag) mediatypes = DOCKER_SCHEMA1_CONTENT_TYPES @@ -804,11 +665,11 @@ def test_convert_manifest(registry_model): def test_create_manifest_and_retarget_tag_with_labels(registry_model): repository_ref = registry_model.lookup_repository("devtable", "simple") - latest_tag = registry_model.get_repo_tag(repository_ref, "latest", include_legacy_image=True) + latest_tag = registry_model.get_repo_tag(repository_ref, "latest") manifest = registry_model.get_manifest_for_tag(latest_tag).get_parsed_manifest() json_metadata = { - "id": latest_tag.legacy_image.docker_image_id, + "id": "someid", "config": {"Labels": {"quay.expires-after": "2w",},}, } @@ -903,7 +764,8 @@ def test_unicode_emoji(registry_model): assert found.get_parsed_manifest().digest == manifest.digest -def test_lookup_active_repository_tags(oci_model): +@pytest.mark.parametrize("test_cached", [False, True,]) +def test_lookup_active_repository_tags(test_cached, oci_model): repository_ref = oci_model.lookup_repository("devtable", "simple") latest_tag = oci_model.get_repo_tag(repository_ref, "latest") manifest = oci_model.get_manifest_for_tag(latest_tag) @@ -924,7 +786,14 @@ def test_lookup_active_repository_tags(oci_model): tags_found = set() tag_id = None while True: - tags = oci_model.lookup_active_repository_tags(repository_ref, tag_id, 11) + if test_cached: + model_cache = InMemoryDataModelCache() + tags = oci_model.lookup_cached_active_repository_tags( + model_cache, repository_ref, tag_id, 11 + ) + else: + tags = oci_model.lookup_active_repository_tags(repository_ref, tag_id, 11) + assert len(tags) <= 11 for tag in tags[0:10]: assert tag.name not in tags_found @@ -942,49 +811,27 @@ def test_lookup_active_repository_tags(oci_model): assert not tags_expected -def test_yield_tags_for_vulnerability_notification(registry_model): - repository_ref = registry_model.lookup_repository("devtable", "complex") - - # Check for all legacy images under the tags and ensure not raised because - # no notification is yet registered. - for tag in registry_model.list_all_active_repository_tags( - repository_ref, include_legacy_images=True - ): - image = registry_model.get_legacy_image( - repository_ref, tag.legacy_image.docker_image_id, include_blob=True - ) - pairs = [(image.docker_image_id, image.blob.uuid)] - results = list(registry_model.yield_tags_for_vulnerability_notification(pairs)) - assert not len(results) - - # Register a notification. - model.notification.create_repo_notification( - repository_ref.id, "vulnerability_found", "email", {}, {} +def test_create_manifest_with_temp_tag(initialized_db, registry_model): + builder = DockerSchema1ManifestBuilder("devtable", "simple", "latest") + builder.add_layer( + "sha256:abcde", json.dumps({"id": "someid", "author": "some user",}, ensure_ascii=False) ) - # Check again. - for tag in registry_model.list_all_active_repository_tags( - repository_ref, include_legacy_images=True - ): - image = registry_model.get_legacy_image( - repository_ref, - tag.legacy_image.docker_image_id, - include_blob=True, - include_parents=True, - ) + manifest = builder.build(ensure_ascii=False) - # Check for every parent of the image. - for current in image.parents: - img = registry_model.get_legacy_image( - repository_ref, current.docker_image_id, include_blob=True - ) - pairs = [(img.docker_image_id, img.blob.uuid)] - results = list(registry_model.yield_tags_for_vulnerability_notification(pairs)) - assert len(results) > 0 - assert tag.name in {t.name for t in results} + for blob_digest in manifest.local_blob_digests: + _populate_blob(blob_digest) - # Check for the image itself. - pairs = [(image.docker_image_id, image.blob.uuid)] - results = list(registry_model.yield_tags_for_vulnerability_notification(pairs)) - assert len(results) > 0 - assert tag.name in {t.name for t in results} + # Create the manifest in the database. + repository_ref = registry_model.lookup_repository("devtable", "simple") + created = registry_model.create_manifest_with_temp_tag(repository_ref, manifest, 300, storage) + assert created.digest == manifest.digest + + # Ensure it cannot be found normally, since it is simply temp-tagged. + assert registry_model.lookup_manifest_by_digest(repository_ref, manifest.digest) is None + + # Ensure it can be found, which means it is temp-tagged. + found = registry_model.lookup_manifest_by_digest( + repository_ref, manifest.digest, allow_dead=True + ) + assert found is not None diff --git a/data/registry_model/test/test_manifestbuilder.py b/data/registry_model/test/test_manifestbuilder.py index b5a4ffa8b..8a65ce3a6 100644 --- a/data/registry_model/test/test_manifestbuilder.py +++ b/data/registry_model/test/test_manifestbuilder.py @@ -82,10 +82,9 @@ def test_build_manifest(layers, fake_session, registry_model): builder.done() # Verify the legacy image for the tag. - found = registry_model.get_repo_tag(repository_ref, "somenewtag", include_legacy_image=True) + found = registry_model.get_repo_tag(repository_ref, "somenewtag") assert found assert found.name == "somenewtag" - assert found.legacy_image.docker_image_id == layers[-1][0] # Verify the blob and manifest. manifest = registry_model.get_manifest_for_tag(found) diff --git a/data/registry_model/test/test_model_shared.py b/data/registry_model/test/test_model_shared.py new file mode 100644 index 000000000..fbaeda372 --- /dev/null +++ b/data/registry_model/test/test_model_shared.py @@ -0,0 +1,19 @@ +import pytest + +from data.registry_model.shared import SyntheticIDHandler + + +@pytest.mark.parametrize("manifest_id", [1, 1000, 10000, 60000]) +@pytest.mark.parametrize("hash_salt", [None, "", "testing1234", "foobarbaz",]) +def test_handler(manifest_id, hash_salt): + handler = SyntheticIDHandler(hash_salt) + for index in range(0, 10): + assert handler.decode(handler.encode(manifest_id, layer_index=index)) == ( + manifest_id, + index, + ) + + +def test_invalid_value(): + handler = SyntheticIDHandler("somehash") + assert handler.decode("invalidvalue") == () diff --git a/data/secscan_model/__init__.py b/data/secscan_model/__init__.py index a5509ecf1..597896135 100644 --- a/data/secscan_model/__init__.py +++ b/data/secscan_model/__init__.py @@ -3,8 +3,13 @@ import logging from collections import namedtuple from data.secscan_model.secscan_v2_model import V2SecurityScanner, NoopV2SecurityScanner -from data.secscan_model.secscan_v4_model import V4SecurityScanner, NoopV4SecurityScanner +from data.secscan_model.secscan_v4_model import ( + V4SecurityScanner, + NoopV4SecurityScanner, + ScanToken as V4ScanToken, +) from data.secscan_model.interface import SecurityScannerInterface, InvalidConfigurationException +from data.secscan_model.datatypes import SecurityInformationLookupResult, ScanLookupStatus from data.database import Manifest from data.registry_model.datatypes import Manifest as ManifestDataType @@ -12,68 +17,52 @@ from data.registry_model.datatypes import Manifest as ManifestDataType logger = logging.getLogger(__name__) -SplitScanToken = namedtuple("NextScanToken", ["version", "token"]) - - class SecurityScannerModelProxy(SecurityScannerInterface): def configure(self, app, instance_keys, storage): - # TODO(alecmerdler): Just use `V4SecurityScanner` once Clair V2 is removed. try: - self._model = V2SecurityScanner(app, instance_keys, storage) + self._model = V4SecurityScanner(app, instance_keys, storage) except InvalidConfigurationException: - self._model = NoopV2SecurityScanner() + self._model = NoopV4SecurityScanner() try: - self._v4_model = V4SecurityScanner(app, instance_keys, storage) + self._legacy_model = V2SecurityScanner(app, instance_keys, storage) except InvalidConfigurationException: - self._v4_model = NoopV4SecurityScanner() - - self._v4_namespace_whitelist = app.config.get("SECURITY_SCANNER_V4_NAMESPACE_WHITELIST", []) + self._legacy_model = NoopV2SecurityScanner() logger.info("===============================") - logger.info("Using split secscan model: `%s`", [self._model, self._v4_model]) - logger.info("v4 whitelist `%s`", self._v4_namespace_whitelist) + logger.info("Using split secscan model: `%s`", [self._legacy_model, self._model]) logger.info("===============================") return self def perform_indexing(self, next_token=None): - if next_token is None: - return SplitScanToken("v4", self._v4_model.perform_indexing(None)) + if next_token is not None: + assert isinstance(next_token, V4ScanToken) + assert isinstance(next_token.min_id, int) - if next_token.version == "v4" and next_token.token is not None: - return SplitScanToken("v4", self._v4_model.perform_indexing(next_token.token)) - - if next_token.version == "v4" and next_token.token is None: - return SplitScanToken("v2", self._model.perform_indexing(None)) - - if next_token.version == "v2" and next_token.token is not None: - return SplitScanToken("v2", self._model.perform_indexing(next_token.token)) - - if next_token.version == "v2" and next_token.token is None: - return None + return self._model.perform_indexing(next_token) def load_security_information(self, manifest_or_legacy_image, include_vulnerabilities): - if isinstance(manifest_or_legacy_image, ManifestDataType): - namespace = Manifest.get( - manifest_or_legacy_image._db_id - ).repository.namespace_user.username + manifest = manifest_or_legacy_image.as_manifest() - if namespace in self._v4_namespace_whitelist: - return self._v4_model.load_security_information( - manifest_or_legacy_image, include_vulnerabilities - ) + info = self._model.load_security_information(manifest, include_vulnerabilities) + if info.status != ScanLookupStatus.NOT_YET_INDEXED: + return info - return self._model.load_security_information( + legacy_info = self._legacy_model.load_security_information( manifest_or_legacy_image, include_vulnerabilities ) + if legacy_info.status != ScanLookupStatus.UNSUPPORTED_FOR_INDEXING: + return legacy_info + + return SecurityInformationLookupResult.with_status(ScanLookupStatus.NOT_YET_INDEXED) def register_model_cleanup_callbacks(self, data_model_config): return self._model.register_model_cleanup_callbacks(data_model_config) @property def legacy_api_handler(self): - return self._model.legacy_api_handler + return self._legacy_model.legacy_api_handler secscan_model = SecurityScannerModelProxy() diff --git a/data/secscan_model/secscan_v2_model.py b/data/secscan_model/secscan_v2_model.py index 94ff8efa6..07c92a6bd 100644 --- a/data/secscan_model/secscan_v2_model.py +++ b/data/secscan_model/secscan_v2_model.py @@ -1,13 +1,10 @@ import logging from collections import namedtuple -from math import log10 from prometheus_client import Gauge from deprecated import deprecated -from data.database import UseThenDisconnect - from data.secscan_model.interface import SecurityScannerInterface, InvalidConfigurationException from data.secscan_model.datatypes import ( ScanLookupStatus, @@ -21,14 +18,6 @@ from data.secscan_model.datatypes import ( from data.registry_model import registry_model from data.registry_model.datatypes import SecurityScanStatus -from data.model.image import ( - get_images_eligible_for_scan, - get_image_pk_field, - get_max_id_for_sec_scan, - get_min_id_for_sec_scan, -) - -from util.migrate.allocator import yield_random_entries from util.config import URLSchemeAndHostname from util.secscan.api import V2SecurityConfigValidator, SecurityScannerAPI, APIRequestFailure from util.secscan.secscan_util import get_blob_download_uri_getter @@ -111,12 +100,8 @@ class V2SecurityScanner(SecurityScannerInterface): instance_keys=instance_keys, ) - # NOTE: This import is in here because otherwise this class would depend upon app. - # Its not great, but as this is intended to be legacy until its removed, its okay. - from util.secscan.analyzer import LayerAnalyzer - - self._target_version = app.config.get("SECURITY_SCANNER_ENGINE_VERSION_TARGET", 3) - self._analyzer = LayerAnalyzer(app.config, self._legacy_secscan_api) + def register_model_cleanup_callbacks(self, data_model_config): + pass @property def legacy_api_handler(self): @@ -125,12 +110,6 @@ class V2SecurityScanner(SecurityScannerInterface): """ return self._legacy_secscan_api - def register_model_cleanup_callbacks(self, data_model_config): - if self._legacy_secscan_api is not None: - data_model_config.register_image_cleanup_callback( - self._legacy_secscan_api.cleanup_layers - ) - def load_security_information(self, manifest_or_legacy_image, include_vulnerabilities=False): status = registry_model.get_security_status(manifest_or_legacy_image) if status is None: @@ -164,80 +143,13 @@ class V2SecurityScanner(SecurityScannerInterface): return SecurityInformationLookupResult.for_request_error(str(arf)) if data is None: - # If no data was found but we reached this point, then it indicates we have incorrect security - # status for the manifest or legacy image. Mark the manifest or legacy image as unindexed - # so it automatically gets re-indexed. - if self.app.config.get("REGISTRY_STATE", "normal") == "normal": - registry_model.reset_security_status(manifest_or_legacy_image) - return SecurityInformationLookupResult.with_status(ScanLookupStatus.NOT_YET_INDEXED) return SecurityInformationLookupResult.for_data(SecurityInformation.from_dict(data)) - def _candidates_to_scan(self, start_token=None): - target_version = self._target_version - - def batch_query(): - return get_images_eligible_for_scan(target_version) - - # Find the minimum ID. - min_id = None - if start_token is not None: - min_id = start_token.min_id - else: - min_id = self.app.config.get("SECURITY_SCANNER_INDEXING_MIN_ID") - if min_id is None: - min_id = get_min_id_for_sec_scan(target_version) - - # Get the ID of the last image we can analyze. Will be None if there are no images in the - # database. - max_id = get_max_id_for_sec_scan() - if max_id is None: - return (None, None) - - if min_id is None or min_id > max_id: - return (None, None) - - # 4^log10(total) gives us a scalable batch size into the billions. - batch_size = int(4 ** log10(max(10, max_id - min_id))) - - # TODO: Once we have a clean shared NamedTuple for Images, send that to the secscan analyzer - # rather than the database Image itself. - iterator = yield_random_entries( - batch_query, get_image_pk_field(), batch_size, max_id, min_id, - ) - - return (iterator, ScanToken(max_id + 1)) - def perform_indexing(self, start_token=None): """ Performs indexing of the next set of unindexed manifests/images. - - If start_token is given, the indexing should resume from that point. Returns a new start - index for the next iteration of indexing. The tokens returned and given are assumed to be - opaque outside of this implementation and should not be relied upon by the caller to conform - to any particular format. + NOTE: Raises `NotImplementedError` because indexing for v2 is not supported. """ - # NOTE: This import is in here because otherwise this class would depend upon app. - # Its not great, but as this is intended to be legacy until its removed, its okay. - from util.secscan.analyzer import PreemptedException - - iterator, next_token = self._candidates_to_scan(start_token) - if iterator is None: - logger.debug("Found no additional images to scan") - return None - - with UseThenDisconnect(self.app.config): - for candidate, abt, num_remaining in iterator: - try: - self._analyzer.analyze_recursively(candidate) - except PreemptedException: - logger.debug("Another worker pre-empted us for layer: %s", candidate.id) - abt.set() - except APIRequestFailure: - logger.exception("Security scanner service unavailable") - return - - unscanned_images.set(num_remaining) - - return next_token + raise NotImplementedError("Unsupported for this security scanner version") diff --git a/data/secscan_model/secscan_v4_model.py b/data/secscan_model/secscan_v4_model.py index adfa79edf..cc1d67fc1 100644 --- a/data/secscan_model/secscan_v4_model.py +++ b/data/secscan_model/secscan_v4_model.py @@ -148,19 +148,11 @@ class V4SecurityScanner(SecurityScannerInterface): ) def perform_indexing(self, start_token=None): - whitelisted_namespaces = self.app.config.get("SECURITY_SCANNER_V4_NAMESPACE_WHITELIST", []) try: indexer_state = self._secscan_api.state() except APIRequestFailure: return None - def eligible_manifests(base_query): - return ( - base_query.join(Repository) - .join(User) - .where(User.username << whitelisted_namespaces) - ) - min_id = ( start_token.min_id if start_token is not None @@ -178,16 +170,14 @@ class V4SecurityScanner(SecurityScannerInterface): # TODO(alecmerdler): Filter out any `Manifests` that are still being uploaded def not_indexed_query(): return ( - eligible_manifests(Manifest.select()) - .switch(Manifest) + Manifest.select() .join(ManifestSecurityStatus, JOIN.LEFT_OUTER) .where(ManifestSecurityStatus.id >> None) ) def index_error_query(): return ( - eligible_manifests(Manifest.select()) - .switch(Manifest) + Manifest.select() .join(ManifestSecurityStatus) .where( ManifestSecurityStatus.index_status == IndexStatus.FAILED, @@ -197,8 +187,7 @@ class V4SecurityScanner(SecurityScannerInterface): def needs_reindexing_query(indexer_hash): return ( - eligible_manifests(Manifest.select()) - .switch(Manifest) + Manifest.select() .join(ManifestSecurityStatus) .where( ManifestSecurityStatus.indexer_hash != indexer_hash, @@ -209,6 +198,7 @@ class V4SecurityScanner(SecurityScannerInterface): # 4^log10(total) gives us a scalable batch size into the billions. batch_size = int(4 ** log10(max(10, max_id - min_id))) + # TODO(alecmerdler): We want to index newer manifests first, while backfilling older manifests... iterator = itertools.chain( yield_random_entries(not_indexed_query, Manifest.id, batch_size, max_id, min_id,), yield_random_entries(index_error_query, Manifest.id, batch_size, max_id, min_id,), diff --git a/data/secscan_model/test/test_secscan_interface.py b/data/secscan_model/test/test_secscan_interface.py index 212f0d0a0..cab596683 100644 --- a/data/secscan_model/test/test_secscan_interface.py +++ b/data/secscan_model/test/test_secscan_interface.py @@ -1,4 +1,5 @@ import pytest + from mock import patch, Mock from data.secscan_model.datatypes import ScanLookupStatus, SecurityInformationLookupResult @@ -8,8 +9,10 @@ from data.secscan_model.secscan_v4_model import ( IndexReportState, ScanToken as V4ScanToken, ) -from data.secscan_model import secscan_model, SplitScanToken +from data.secscan_model import secscan_model from data.registry_model import registry_model +from data.model.oci import shared +from data.database import ManifestSecurityStatus, IndexerVersion, IndexStatus, ManifestLegacyImage from test.fixtures import * @@ -17,84 +20,62 @@ from app import app, instance_keys, storage @pytest.mark.parametrize( - "repository, v4_whitelist", - [(("devtable", "complex"), []), (("devtable", "complex"), ["devtable"]),], + "indexed_v2, indexed_v4, expected_status", + [ + (False, False, ScanLookupStatus.NOT_YET_INDEXED), + (False, True, ScanLookupStatus.UNSUPPORTED_FOR_INDEXING), + (True, False, ScanLookupStatus.FAILED_TO_INDEX), + (True, True, ScanLookupStatus.UNSUPPORTED_FOR_INDEXING), + ], ) -def test_load_security_information_v2_only(repository, v4_whitelist, initialized_db): - app.config["SECURITY_SCANNER_V4_NAMESPACE_WHITELIST"] = v4_whitelist - +def test_load_security_information(indexed_v2, indexed_v4, expected_status, initialized_db): secscan_model.configure(app, instance_keys, storage) - repo = registry_model.lookup_repository(*repository) - for tag in registry_model.list_all_active_repository_tags(repo): - manifest = registry_model.get_manifest_for_tag(tag) - assert manifest + repository_ref = registry_model.lookup_repository("devtable", "simple") + tag = registry_model.find_matching_tag(repository_ref, ["latest"]) + manifest = registry_model.get_manifest_for_tag(tag) + assert manifest - result = secscan_model.load_security_information(manifest, True) - assert isinstance(result, SecurityInformationLookupResult) - assert result.status == ScanLookupStatus.NOT_YET_INDEXED + registry_model.populate_legacy_images_for_testing(manifest, storage) + + image = shared.get_legacy_image_for_manifest(manifest._db_id) + + if indexed_v2: + image.security_indexed = False + image.security_indexed_engine = 3 + image.save() + else: + ManifestLegacyImage.delete().where( + ManifestLegacyImage.manifest == manifest._db_id + ).execute() + + if indexed_v4: + ManifestSecurityStatus.create( + manifest=manifest._db_id, + repository=repository_ref._db_id, + error_json={}, + index_status=IndexStatus.MANIFEST_UNSUPPORTED, + indexer_hash="abc", + indexer_version=IndexerVersion.V4, + metadata_json={}, + ) + + result = secscan_model.load_security_information(manifest, True) + + assert isinstance(result, SecurityInformationLookupResult) + assert result.status == expected_status @pytest.mark.parametrize( - "repository, v4_whitelist", + "next_token, expected_next_token, expected_error", [ - (("devtable", "complex"), []), - (("devtable", "complex"), ["devtable"]), - (("buynlarge", "orgrepo"), ["devtable"]), - (("buynlarge", "orgrepo"), ["devtable", "buynlarge"]), - (("buynlarge", "orgrepo"), ["devtable", "buynlarge", "sellnsmall"]), + (None, V4ScanToken(56), None), + (V4ScanToken(None), V4ScanToken(56), AssertionError), + (V4ScanToken(1), V4ScanToken(56), None), + (V2ScanToken(158), V4ScanToken(56), AssertionError), ], ) -def test_load_security_information(repository, v4_whitelist, initialized_db): - app.config["SECURITY_SCANNER_V4_NAMESPACE_WHITELIST"] = v4_whitelist - app.config["SECURITY_SCANNER_V4_ENDPOINT"] = "http://clairv4:6060" - secscan_api = Mock() - - with patch("data.secscan_model.secscan_v4_model.ClairSecurityScannerAPI", secscan_api): - secscan_model.configure(app, instance_keys, storage) - - repo = registry_model.lookup_repository(*repository) - for tag in registry_model.list_all_active_repository_tags(repo): - manifest = registry_model.get_manifest_for_tag(tag) - assert manifest - - result = secscan_model.load_security_information(manifest, True) - assert isinstance(result, SecurityInformationLookupResult) - assert result.status == ScanLookupStatus.NOT_YET_INDEXED - - -@pytest.mark.parametrize( - "next_token, expected_next_token", - [ - (None, SplitScanToken("v4", None)), - (SplitScanToken("v4", V4ScanToken(1)), SplitScanToken("v4", None)), - (SplitScanToken("v4", None), SplitScanToken("v2", V2ScanToken(318))), - (SplitScanToken("v2", V2ScanToken(318)), SplitScanToken("v2", None)), - (SplitScanToken("v2", None), None), - ], -) -def test_perform_indexing_v2_only(next_token, expected_next_token, initialized_db): - def layer_analyzer(*args, **kwargs): - return Mock() - - with patch("util.secscan.analyzer.LayerAnalyzer", layer_analyzer): - secscan_model.configure(app, instance_keys, storage) - - assert secscan_model.perform_indexing(next_token) == expected_next_token - - -@pytest.mark.parametrize( - "next_token, expected_next_token", - [ - (None, SplitScanToken("v4", V4ScanToken(56))), - (SplitScanToken("v4", V4ScanToken(1)), SplitScanToken("v4", V4ScanToken(56))), - (SplitScanToken("v4", None), SplitScanToken("v2", V2ScanToken(318))), - (SplitScanToken("v2", V2ScanToken(318)), SplitScanToken("v2", None)), - (SplitScanToken("v2", None), None), - ], -) -def test_perform_indexing(next_token, expected_next_token, initialized_db): - app.config["SECURITY_SCANNER_V4_NAMESPACE_WHITELIST"] = ["devtable"] +def test_perform_indexing(next_token, expected_next_token, expected_error, initialized_db): app.config["SECURITY_SCANNER_V4_ENDPOINT"] = "http://clairv4:6060" def secscan_api(*args, **kwargs): @@ -104,11 +85,11 @@ def test_perform_indexing(next_token, expected_next_token, initialized_db): return api - def layer_analyzer(*args, **kwargs): - return Mock() - with patch("data.secscan_model.secscan_v4_model.ClairSecurityScannerAPI", secscan_api): - with patch("util.secscan.analyzer.LayerAnalyzer", layer_analyzer): - secscan_model.configure(app, instance_keys, storage) + secscan_model.configure(app, instance_keys, storage) + if expected_error is not None: + with pytest.raises(expected_error): + secscan_model.perform_indexing(next_token) + else: assert secscan_model.perform_indexing(next_token) == expected_next_token diff --git a/data/secscan_model/test/test_secscan_v2_model.py b/data/secscan_model/test/test_secscan_v2_model.py index c8d5376a0..a1cf13012 100644 --- a/data/secscan_model/test/test_secscan_v2_model.py +++ b/data/secscan_model/test/test_secscan_v2_model.py @@ -4,7 +4,7 @@ import pytest from data.secscan_model.datatypes import ScanLookupStatus, SecurityInformation from data.secscan_model.secscan_v2_model import V2SecurityScanner from data.registry_model import registry_model -from data.database import Manifest, Image +from data.database import Manifest, Image, ManifestSecurityStatus, IndexStatus, IndexerVersion from data.model.oci import shared from data.model.image import set_secscan_status @@ -15,8 +15,10 @@ from app import app, instance_keys, storage def test_load_security_information_unknown_manifest(initialized_db): repository_ref = registry_model.lookup_repository("devtable", "simple") - tag = registry_model.get_repo_tag(repository_ref, "latest", include_legacy_image=True) - manifest = registry_model.get_manifest_for_tag(tag, backfill_if_necessary=True) + tag = registry_model.get_repo_tag(repository_ref, "latest") + manifest = registry_model.get_manifest_for_tag(tag) + + registry_model.populate_legacy_images_for_testing(manifest, storage) # Delete the manifest. Manifest.get(id=manifest._db_id).delete_instance(recursive=True) @@ -30,8 +32,10 @@ def test_load_security_information_unknown_manifest(initialized_db): def test_load_security_information_failed_to_index(initialized_db): repository_ref = registry_model.lookup_repository("devtable", "simple") - tag = registry_model.get_repo_tag(repository_ref, "latest", include_legacy_image=True) - manifest = registry_model.get_manifest_for_tag(tag, backfill_if_necessary=True) + tag = registry_model.get_repo_tag(repository_ref, "latest") + manifest = registry_model.get_manifest_for_tag(tag) + + registry_model.populate_legacy_images_for_testing(manifest, storage) # Set the index status. image = shared.get_legacy_image_for_manifest(manifest._db_id) @@ -45,8 +49,10 @@ def test_load_security_information_failed_to_index(initialized_db): def test_load_security_information_queued(initialized_db): repository_ref = registry_model.lookup_repository("devtable", "simple") - tag = registry_model.get_repo_tag(repository_ref, "latest", include_legacy_image=True) - manifest = registry_model.get_manifest_for_tag(tag, backfill_if_necessary=True) + tag = registry_model.get_repo_tag(repository_ref, "latest") + manifest = registry_model.get_manifest_for_tag(tag) + + registry_model.populate_legacy_images_for_testing(manifest, storage) secscan = V2SecurityScanner(app, instance_keys, storage) assert secscan.load_security_information(manifest).status == ScanLookupStatus.NOT_YET_INDEXED @@ -87,11 +93,14 @@ def test_load_security_information_queued(initialized_db): ) def test_load_security_information_api_responses(secscan_api_response, initialized_db): repository_ref = registry_model.lookup_repository("devtable", "simple") - tag = registry_model.get_repo_tag(repository_ref, "latest", include_legacy_image=True) - manifest = registry_model.get_manifest_for_tag( - tag, backfill_if_necessary=True, include_legacy_image=True - ) - set_secscan_status(Image.get(id=manifest.legacy_image._db_id), True, 3) + tag = registry_model.get_repo_tag(repository_ref, "latest") + manifest = registry_model.get_manifest_for_tag(tag) + + registry_model.populate_legacy_images_for_testing(manifest, storage) + + legacy_image_row = shared.get_legacy_image_for_manifest(manifest._db_id) + assert legacy_image_row is not None + set_secscan_status(legacy_image_row, True, 3) secscan = V2SecurityScanner(app, instance_keys, storage) secscan._legacy_secscan_api = mock.Mock() @@ -110,3 +119,10 @@ def test_load_security_information_api_responses(secscan_api_response, initializ assert len(security_information.Layer.Features) == len( secscan_api_response["Layer"].get("Features", []) ) + + +def test_perform_indexing(initialized_db): + secscan = V2SecurityScanner(app, instance_keys, storage) + + with pytest.raises(NotImplementedError): + secscan.perform_indexing() diff --git a/data/secscan_model/test/test_secscan_v4_model.py b/data/secscan_model/test/test_secscan_v4_model.py index 91084113d..e2f09071c 100644 --- a/data/secscan_model/test/test_secscan_v4_model.py +++ b/data/secscan_model/test/test_secscan_v4_model.py @@ -33,8 +33,8 @@ def set_secscan_config(): def test_load_security_information_queued(initialized_db, set_secscan_config): repository_ref = registry_model.lookup_repository("devtable", "simple") - tag = registry_model.get_repo_tag(repository_ref, "latest", include_legacy_image=True) - manifest = registry_model.get_manifest_for_tag(tag, backfill_if_necessary=True) + tag = registry_model.get_repo_tag(repository_ref, "latest") + manifest = registry_model.get_manifest_for_tag(tag) secscan = V4SecurityScanner(app, instance_keys, storage) assert secscan.load_security_information(manifest).status == ScanLookupStatus.NOT_YET_INDEXED @@ -42,8 +42,8 @@ def test_load_security_information_queued(initialized_db, set_secscan_config): def test_load_security_information_failed_to_index(initialized_db, set_secscan_config): repository_ref = registry_model.lookup_repository("devtable", "simple") - tag = registry_model.get_repo_tag(repository_ref, "latest", include_legacy_image=True) - manifest = registry_model.get_manifest_for_tag(tag, backfill_if_necessary=True) + tag = registry_model.get_repo_tag(repository_ref, "latest") + manifest = registry_model.get_manifest_for_tag(tag) ManifestSecurityStatus.create( manifest=manifest._db_id, @@ -61,8 +61,8 @@ def test_load_security_information_failed_to_index(initialized_db, set_secscan_c def test_load_security_information_api_returns_none(initialized_db, set_secscan_config): repository_ref = registry_model.lookup_repository("devtable", "simple") - tag = registry_model.get_repo_tag(repository_ref, "latest", include_legacy_image=True) - manifest = registry_model.get_manifest_for_tag(tag, backfill_if_necessary=True) + tag = registry_model.get_repo_tag(repository_ref, "latest") + manifest = registry_model.get_manifest_for_tag(tag) ManifestSecurityStatus.create( manifest=manifest._db_id, @@ -83,8 +83,8 @@ def test_load_security_information_api_returns_none(initialized_db, set_secscan_ def test_load_security_information_api_request_failure(initialized_db, set_secscan_config): repository_ref = registry_model.lookup_repository("devtable", "simple") - tag = registry_model.get_repo_tag(repository_ref, "latest", include_legacy_image=True) - manifest = registry_model.get_manifest_for_tag(tag, backfill_if_necessary=True) + tag = registry_model.get_repo_tag(repository_ref, "latest") + manifest = registry_model.get_manifest_for_tag(tag) mss = ManifestSecurityStatus.create( manifest=manifest._db_id, @@ -106,8 +106,8 @@ def test_load_security_information_api_request_failure(initialized_db, set_secsc def test_load_security_information_success(initialized_db, set_secscan_config): repository_ref = registry_model.lookup_repository("devtable", "simple") - tag = registry_model.get_repo_tag(repository_ref, "latest", include_legacy_image=True) - manifest = registry_model.get_manifest_for_tag(tag, backfill_if_necessary=True) + tag = registry_model.get_repo_tag(repository_ref, "latest") + manifest = registry_model.get_manifest_for_tag(tag) ManifestSecurityStatus.create( manifest=manifest._db_id, @@ -140,11 +140,6 @@ def test_load_security_information_success(initialized_db, set_secscan_config): def test_perform_indexing_whitelist(initialized_db, set_secscan_config): - app.config["SECURITY_SCANNER_V4_NAMESPACE_WHITELIST"] = ["devtable"] - expected_manifests = ( - Manifest.select().join(Repository).join(User).where(User.username == "devtable") - ) - secscan = V4SecurityScanner(app, instance_keys, storage) secscan._secscan_api = mock.Mock() secscan._secscan_api.state.return_value = {"state": "abc"} @@ -155,38 +150,15 @@ def test_perform_indexing_whitelist(initialized_db, set_secscan_config): next_token = secscan.perform_indexing() - assert secscan._secscan_api.index.call_count == expected_manifests.count() - for mss in ManifestSecurityStatus.select(): - assert mss.repository.namespace_user.username == "devtable" - assert ManifestSecurityStatus.select().count() == expected_manifests.count() - assert ( - Manifest.get_by_id(next_token.min_id - 1).repository.namespace_user.username == "devtable" - ) - - -def test_perform_indexing_empty_whitelist(initialized_db, set_secscan_config): - app.config["SECURITY_SCANNER_V4_NAMESPACE_WHITELIST"] = [] - secscan = V4SecurityScanner(app, instance_keys, storage) - secscan._secscan_api = mock.Mock() - secscan._secscan_api.state.return_value = {"state": "abc"} - secscan._secscan_api.index.return_value = ( - {"err": None, "state": IndexReportState.Index_Finished}, - "abc", - ) - - next_token = secscan.perform_indexing() - - assert secscan._secscan_api.index.call_count == 0 - assert ManifestSecurityStatus.select().count() == 0 assert next_token.min_id == Manifest.select(fn.Max(Manifest.id)).scalar() + 1 + assert secscan._secscan_api.index.call_count == Manifest.select().count() + assert ManifestSecurityStatus.select().count() == Manifest.select().count() + for mss in ManifestSecurityStatus.select(): + assert mss.index_status == IndexStatus.COMPLETED + def test_perform_indexing_failed(initialized_db, set_secscan_config): - app.config["SECURITY_SCANNER_V4_NAMESPACE_WHITELIST"] = ["devtable"] - expected_manifests = ( - Manifest.select().join(Repository).join(User).where(User.username == "devtable") - ) - secscan = V4SecurityScanner(app, instance_keys, storage) secscan._secscan_api = mock.Mock() secscan._secscan_api.state.return_value = {"state": "abc"} @@ -195,7 +167,7 @@ def test_perform_indexing_failed(initialized_db, set_secscan_config): "abc", ) - for manifest in expected_manifests: + for manifest in Manifest.select(): ManifestSecurityStatus.create( manifest=manifest, repository=manifest.repository, @@ -210,16 +182,13 @@ def test_perform_indexing_failed(initialized_db, set_secscan_config): secscan.perform_indexing() - assert ManifestSecurityStatus.select().count() == expected_manifests.count() + assert ManifestSecurityStatus.select().count() == Manifest.select().count() for mss in ManifestSecurityStatus.select(): assert mss.index_status == IndexStatus.COMPLETED def test_perform_indexing_failed_within_reindex_threshold(initialized_db, set_secscan_config): app.config["SECURITY_SCANNER_V4_REINDEX_THRESHOLD"] = 300 - expected_manifests = ( - Manifest.select().join(Repository).join(User).where(User.username == "devtable") - ) secscan = V4SecurityScanner(app, instance_keys, storage) secscan._secscan_api = mock.Mock() @@ -229,7 +198,7 @@ def test_perform_indexing_failed_within_reindex_threshold(initialized_db, set_se "abc", ) - for manifest in expected_manifests: + for manifest in Manifest.select(): ManifestSecurityStatus.create( manifest=manifest, repository=manifest.repository, @@ -242,17 +211,12 @@ def test_perform_indexing_failed_within_reindex_threshold(initialized_db, set_se secscan.perform_indexing() - assert ManifestSecurityStatus.select().count() == expected_manifests.count() + assert ManifestSecurityStatus.select().count() == Manifest.select().count() for mss in ManifestSecurityStatus.select(): assert mss.index_status == IndexStatus.FAILED def test_perform_indexing_needs_reindexing(initialized_db, set_secscan_config): - app.config["SECURITY_SCANNER_V4_NAMESPACE_WHITELIST"] = ["devtable"] - expected_manifests = ( - Manifest.select().join(Repository).join(User).where(User.username == "devtable") - ) - secscan = V4SecurityScanner(app, instance_keys, storage) secscan._secscan_api = mock.Mock() secscan._secscan_api.state.return_value = {"state": "xyz"} @@ -261,7 +225,7 @@ def test_perform_indexing_needs_reindexing(initialized_db, set_secscan_config): "xyz", ) - for manifest in expected_manifests: + for manifest in Manifest.select(): ManifestSecurityStatus.create( manifest=manifest, repository=manifest.repository, @@ -276,7 +240,7 @@ def test_perform_indexing_needs_reindexing(initialized_db, set_secscan_config): secscan.perform_indexing() - assert ManifestSecurityStatus.select().count() == expected_manifests.count() + assert ManifestSecurityStatus.select().count() == Manifest.select().count() for mss in ManifestSecurityStatus.select(): assert mss.indexer_hash == "xyz" @@ -285,10 +249,6 @@ def test_perform_indexing_needs_reindexing_within_reindex_threshold( initialized_db, set_secscan_config ): app.config["SECURITY_SCANNER_V4_REINDEX_THRESHOLD"] = 300 - app.config["SECURITY_SCANNER_V4_NAMESPACE_WHITELIST"] = ["devtable"] - expected_manifests = ( - Manifest.select().join(Repository).join(User).where(User.username == "devtable") - ) secscan = V4SecurityScanner(app, instance_keys, storage) secscan._secscan_api = mock.Mock() @@ -298,7 +258,7 @@ def test_perform_indexing_needs_reindexing_within_reindex_threshold( "xyz", ) - for manifest in expected_manifests: + for manifest in Manifest.select(): ManifestSecurityStatus.create( manifest=manifest, repository=manifest.repository, @@ -311,14 +271,12 @@ def test_perform_indexing_needs_reindexing_within_reindex_threshold( secscan.perform_indexing() - assert ManifestSecurityStatus.select().count() == expected_manifests.count() + assert ManifestSecurityStatus.select().count() == Manifest.select().count() for mss in ManifestSecurityStatus.select(): assert mss.indexer_hash == "abc" def test_perform_indexing_api_request_failure_state(initialized_db, set_secscan_config): - app.config["SECURITY_SCANNER_V4_NAMESPACE_WHITELIST"] = ["devtable"] - secscan = V4SecurityScanner(app, instance_keys, storage) secscan._secscan_api = mock.Mock() secscan._secscan_api.state.side_effect = APIRequestFailure() @@ -330,14 +288,6 @@ def test_perform_indexing_api_request_failure_state(initialized_db, set_secscan_ def test_perform_indexing_api_request_failure_index(initialized_db, set_secscan_config): - app.config["SECURITY_SCANNER_V4_NAMESPACE_WHITELIST"] = ["devtable"] - expected_manifests = ( - Manifest.select(fn.Max(Manifest.id)) - .join(Repository) - .join(User) - .where(User.username == "devtable") - ) - secscan = V4SecurityScanner(app, instance_keys, storage) secscan._secscan_api = mock.Mock() secscan._secscan_api.state.return_value = {"state": "abc"} @@ -357,8 +307,8 @@ def test_perform_indexing_api_request_failure_index(initialized_db, set_secscan_ next_token = secscan.perform_indexing() - assert next_token.min_id == expected_manifests.scalar() + 1 - assert ManifestSecurityStatus.select().count() == expected_manifests.count() + assert next_token.min_id == Manifest.select(fn.Max(Manifest.id)).scalar() + 1 + assert ManifestSecurityStatus.select().count() == Manifest.select(fn.Max(Manifest.id)).count() def test_features_for(): diff --git a/endpoints/api/image.py b/endpoints/api/image.py index dc13aab5f..41c6f7f2d 100644 --- a/endpoints/api/image.py +++ b/endpoints/api/image.py @@ -3,6 +3,10 @@ List and lookup repository images. """ import json +from collections import defaultdict +from datetime import datetime + +from app import storage from data.registry_model import registry_model from endpoints.api import ( resource, @@ -17,7 +21,7 @@ from endpoints.api import ( from endpoints.exception import NotFound -def image_dict(image, with_history=False, with_tags=False): +def image_dict(image): parsed_command = None if image.command: try: @@ -31,19 +35,11 @@ def image_dict(image, with_history=False, with_tags=False): "comment": image.comment, "command": parsed_command, "size": image.image_size, - "uploading": image.uploading, - "sort_index": len(image.parents), + "uploading": False, + "sort_index": 0, } - if with_tags: - image_data["tags"] = [tag.name for tag in image.tags] - - if with_history: - image_data["history"] = [image_dict(parent) for parent in image.parents] - - # Calculate the ancestors string, with the DBID's replaced with the docker IDs. - parent_docker_ids = [parent_image.docker_image_id for parent_image in image.parents] - image_data["ancestors"] = "/{0}/".format("/".join(parent_docker_ids)) + image_data["ancestors"] = "/{0}/".format("/".join(image.ancestor_ids)) return image_data @@ -66,8 +62,35 @@ class RepositoryImageList(RepositoryParamResource): if repo_ref is None: raise NotFound() - images = registry_model.get_legacy_images(repo_ref) - return {"images": [image_dict(image, with_tags=True) for image in images]} + tags = registry_model.list_all_active_repository_tags(repo_ref) + images_with_tags = defaultdict(list) + for tag in tags: + legacy_image_id = tag.manifest.legacy_image_root_id + if legacy_image_id is not None: + images_with_tags[legacy_image_id].append(tag) + + # NOTE: This is replicating our older response for this endpoint, but + # returns empty for the metadata fields. This is to ensure back-compat + # for callers still using the deprecated API, while not having to load + # all the manifests from storage. + return { + "images": [ + { + "id": image_id, + "created": format_date( + datetime.utcfromtimestamp((min([tag.lifetime_start_ts for tag in tags]))) + ), + "comment": "", + "command": "", + "size": 0, + "uploading": False, + "sort_index": 0, + "tags": [tag.name for tag in tags], + "ancestors": "", + } + for image_id, tags in images_with_tags.items() + ] + } @resource("/v1/repository//image/") @@ -90,8 +113,8 @@ class RepositoryImage(RepositoryParamResource): if repo_ref is None: raise NotFound() - image = registry_model.get_legacy_image(repo_ref, image_id, include_parents=True) + image = registry_model.get_legacy_image(repo_ref, image_id, storage) if image is None: raise NotFound() - return image_dict(image, with_history=True) + return image_dict(image) diff --git a/endpoints/api/manifest.py b/endpoints/api/manifest.py index 9baf8a7a7..502885c5b 100644 --- a/endpoints/api/manifest.py +++ b/endpoints/api/manifest.py @@ -4,6 +4,7 @@ Manage the manifests of a repository. import json import logging +from datetime import datetime from flask import request from app import label_validator, storage @@ -74,10 +75,6 @@ def _layer_dict(manifest_layer, index): def _manifest_dict(manifest): - image = None - if manifest.legacy_image_if_present is not None: - image = image_dict(manifest.legacy_image, with_history=True) - layers = None if not manifest.is_manifest_list: layers = registry_model.list_manifest_layers(manifest, storage) @@ -85,14 +82,30 @@ def _manifest_dict(manifest): logger.debug("Missing layers for manifest `%s`", manifest.digest) abort(404) + image = None + if manifest.legacy_image_root_id: + # NOTE: This is replicating our older response for this endpoint, but + # returns empty for the metadata fields. This is to ensure back-compat + # for callers still using the deprecated API. + image = { + "id": manifest.legacy_image_root_id, + "created": format_date(datetime.utcnow()), + "comment": "", + "command": "", + "size": 0, + "uploading": False, + "sort_index": 0, + "ancestors": "", + } + return { "digest": manifest.digest, "is_manifest_list": manifest.is_manifest_list, "manifest_data": manifest.internal_manifest_bytes.as_unicode(), - "image": image, "layers": ( [_layer_dict(lyr.layer_info, idx) for idx, lyr in enumerate(layers)] if layers else None ), + "image": image, } @@ -112,9 +125,7 @@ class RepositoryManifest(RepositoryParamResource): if repo_ref is None: raise NotFound() - manifest = registry_model.lookup_manifest_by_digest( - repo_ref, manifestref, include_legacy_image=True - ) + manifest = registry_model.lookup_manifest_by_digest(repo_ref, manifestref) if manifest is None: raise NotFound() diff --git a/endpoints/api/repository_models_pre_oci.py b/endpoints/api/repository_models_pre_oci.py index 45773c051..6cc05e0f7 100644 --- a/endpoints/api/repository_models_pre_oci.py +++ b/endpoints/api/repository_models_pre_oci.py @@ -161,7 +161,7 @@ class PreOCIModel(RepositoryDataInterface): repo.namespace_user.username, repo.name, repo.rid in star_set, - repo.visibility_id == model.repository.get_public_repo_visibility().id, + model.repository.is_repository_public(repo), repo_kind, repo.description, repo.namespace_user.organization, @@ -257,8 +257,8 @@ class PreOCIModel(RepositoryDataInterface): tags = [ Tag( tag.name, - tag.legacy_image.docker_image_id if tag.legacy_image_if_present else None, - tag.legacy_image.aggregate_size if tag.legacy_image_if_present else None, + tag.manifest.legacy_image_root_id, + tag.manifest_layers_size, tag.lifetime_start_ts, tag.manifest_digest, tag.lifetime_end_ts, diff --git a/endpoints/api/robot_models_pre_oci.py b/endpoints/api/robot_models_pre_oci.py index 54e9a4dea..4c010633e 100644 --- a/endpoints/api/robot_models_pre_oci.py +++ b/endpoints/api/robot_models_pre_oci.py @@ -25,7 +25,7 @@ class RobotPreOCIModel(RobotInterface): return [ Permission( permission.repository.name, - permission.repository.visibility.name, + model.repositoy.repository_visibility_name(permission.repository), permission.role.name, ) for permission in permissions diff --git a/endpoints/api/secscan.py b/endpoints/api/secscan.py index a5ec05943..c459c2cdd 100644 --- a/endpoints/api/secscan.py +++ b/endpoints/api/secscan.py @@ -7,6 +7,7 @@ import features from enum import Enum, unique +from app import storage from auth.decorators import process_basic_auth_no_pass from data.registry_model import registry_model from data.secscan_model import secscan_model @@ -101,7 +102,7 @@ class RepositoryImageSecurity(RepositoryParamResource): if repo_ref is None: raise NotFound() - legacy_image = registry_model.get_legacy_image(repo_ref, imageid) + legacy_image = registry_model.get_legacy_image(repo_ref, imageid, storage) if legacy_image is None: raise NotFound() diff --git a/endpoints/api/tag.py b/endpoints/api/tag.py index cb6da0f30..314ab6f4b 100644 --- a/endpoints/api/tag.py +++ b/endpoints/api/tag.py @@ -9,6 +9,7 @@ from auth.auth_context import get_authenticated_user from data.registry_model import registry_model from endpoints.api import ( resource, + deprecated, nickname, require_repo_read, require_repo_write, @@ -40,18 +41,11 @@ def _tag_dict(tag): if tag.lifetime_end_ts and tag.lifetime_end_ts > 0: tag_info["end_ts"] = tag.lifetime_end_ts - # TODO: Remove this once fully on OCI data model. - if tag.legacy_image_if_present: - tag_info["docker_image_id"] = tag.legacy_image.docker_image_id - tag_info["image_id"] = tag.legacy_image.docker_image_id - tag_info["size"] = tag.legacy_image.aggregate_size - - # TODO: Remove this check once fully on OCI data model. - if tag.manifest_digest: - tag_info["manifest_digest"] = tag.manifest_digest - - if tag.manifest: - tag_info["is_manifest_list"] = tag.manifest.is_manifest_list + tag_info["manifest_digest"] = tag.manifest_digest + tag_info["is_manifest_list"] = tag.manifest.is_manifest_list + tag_info["size"] = tag.manifest_layers_size + tag_info["docker_image_id"] = tag.manifest.legacy_image_root_id + tag_info["image_id"] = tag.manifest.legacy_image_root_id if tag.lifetime_start_ts and tag.lifetime_start_ts > 0: last_modified = format_date(datetime.utcfromtimestamp(tag.lifetime_start_ts)) @@ -188,7 +182,7 @@ class RepositoryTag(RepositoryParamResource): raise InvalidRequest("Could not update tag expiration; Tag has probably changed") if "image" in request.get_json() or "manifest_digest" in request.get_json(): - existing_tag = registry_model.get_repo_tag(repo_ref, tag, include_legacy_image=True) + existing_tag = registry_model.get_repo_tag(repo_ref, tag) manifest_or_image = None image_id = None @@ -201,7 +195,7 @@ class RepositoryTag(RepositoryParamResource): ) else: image_id = request.get_json()["image"] - manifest_or_image = registry_model.get_legacy_image(repo_ref, image_id) + manifest_or_image = registry_model.get_legacy_image(repo_ref, image_id, storage) if manifest_or_image is None: raise NotFound() @@ -272,6 +266,7 @@ class RepositoryTagImages(RepositoryParamResource): @nickname("listTagImages") @disallow_for_app_repositories @parse_args() + @deprecated() @query_param( "owned", "If specified, only images wholely owned by this tag are returned.", @@ -286,30 +281,42 @@ class RepositoryTagImages(RepositoryParamResource): if repo_ref is None: raise NotFound() - tag_ref = registry_model.get_repo_tag(repo_ref, tag, include_legacy_image=True) + tag_ref = registry_model.get_repo_tag(repo_ref, tag) if tag_ref is None: raise NotFound() - if tag_ref.legacy_image_if_present is None: + if parsed_args["owned"]: + # NOTE: This is deprecated, so we just return empty now. return {"images": []} - image_id = tag_ref.legacy_image.docker_image_id + manifest = registry_model.get_manifest_for_tag(tag_ref) + if manifest is None: + raise NotFound() - all_images = None - if parsed_args["owned"]: - # TODO: Remove the `owned` image concept once we are fully on V2_2. - all_images = registry_model.get_legacy_images_owned_by_tag(tag_ref) - else: - image_with_parents = registry_model.get_legacy_image( - repo_ref, image_id, include_parents=True - ) - if image_with_parents is None: - raise NotFound() - - all_images = [image_with_parents] + image_with_parents.parents + legacy_image = registry_model.get_legacy_image( + repo_ref, manifest.legacy_image_root_id, storage + ) + if legacy_image is None: + raise NotFound() + # NOTE: This is replicating our older response for this endpoint, but + # returns empty for the metadata fields. This is to ensure back-compat + # for callers still using the deprecated API, while not having to load + # all the manifests from storage. return { - "images": [image_dict(image) for image in all_images], + "images": [ + { + "id": image_id, + "created": format_date(datetime.utcfromtimestamp(tag_ref.lifetime_start_ts)), + "comment": "", + "command": "", + "size": 0, + "uploading": False, + "sort_index": 0, + "ancestors": "", + } + for image_id in legacy_image.full_image_id_chain + ] } @@ -374,7 +381,7 @@ class RestoreTag(RepositoryParamResource): repo_ref, manifest_digest, allow_dead=True, require_available=True ) elif image_id is not None: - manifest_or_legacy_image = registry_model.get_legacy_image(repo_ref, image_id) + manifest_or_legacy_image = registry_model.get_legacy_image(repo_ref, image_id, storage) if manifest_or_legacy_image is None: raise NotFound() diff --git a/endpoints/api/team.py b/endpoints/api/team.py index c2ca6e960..44167842c 100644 --- a/endpoints/api/team.py +++ b/endpoints/api/team.py @@ -49,7 +49,7 @@ def permission_view(permission): return { "repository": { "name": permission.repository.name, - "is_public": permission.repository.visibility.name == "public", + "is_public": model.repository.is_repository_public(permission.repository), }, "role": permission.role.name, } diff --git a/endpoints/api/test/test_deprecated_route.py b/endpoints/api/test/test_deprecated_route.py index c290ba5f1..361433021 100644 --- a/endpoints/api/test/test_deprecated_route.py +++ b/endpoints/api/test/test_deprecated_route.py @@ -11,16 +11,15 @@ from test.fixtures import * def test_deprecated_route(client): repository_ref = registry_model.lookup_repository("devtable", "simple") - tag = registry_model.get_repo_tag(repository_ref, "latest", include_legacy_image=True) - manifest = registry_model.get_manifest_for_tag(tag, backfill_if_necessary=True) - image = shared.get_legacy_image_for_manifest(manifest._db_id) + tag = registry_model.get_repo_tag(repository_ref, "latest") + manifest = registry_model.get_manifest_for_tag(tag) with client_with_identity("devtable", client) as cl: resp = conduct_api_call( cl, RepositoryImageSecurity, "get", - {"repository": "devtable/simple", "imageid": image.docker_image_id}, + {"repository": "devtable/simple", "imageid": manifest.legacy_image_root_id}, expected_code=200, ) diff --git a/endpoints/api/test/test_secscan.py b/endpoints/api/test/test_secscan.py index 561a17722..b88633399 100644 --- a/endpoints/api/test/test_secscan.py +++ b/endpoints/api/test/test_secscan.py @@ -13,12 +13,12 @@ from test.fixtures import * @pytest.mark.parametrize("endpoint", [RepositoryImageSecurity, RepositoryManifestSecurity,]) def test_get_security_info_with_pull_secret(endpoint, client): repository_ref = registry_model.lookup_repository("devtable", "simple") - tag = registry_model.get_repo_tag(repository_ref, "latest", include_legacy_image=True) - manifest = registry_model.get_manifest_for_tag(tag, backfill_if_necessary=True) + tag = registry_model.get_repo_tag(repository_ref, "latest") + manifest = registry_model.get_manifest_for_tag(tag) params = { "repository": "devtable/simple", - "imageid": tag.legacy_image.docker_image_id, + "imageid": tag.manifest.legacy_image_root_id, "manifestref": manifest.digest, } diff --git a/endpoints/api/test/test_tag.py b/endpoints/api/test/test_tag.py index b03550665..7afc04d28 100644 --- a/endpoints/api/test/test_tag.py +++ b/endpoints/api/test/test_tag.py @@ -69,10 +69,10 @@ def test_move_tag(image_exists, test_tag, expected_status, client, app): test_image = "unknown" if image_exists: repo_ref = registry_model.lookup_repository("devtable", "simple") - tag_ref = registry_model.get_repo_tag(repo_ref, "latest", include_legacy_image=True) + tag_ref = registry_model.get_repo_tag(repo_ref, "latest") assert tag_ref - test_image = tag_ref.legacy_image.docker_image_id + test_image = tag_ref.manifest.legacy_image_root_id params = {"repository": "devtable/simple", "tag": test_tag} request_body = {"image": test_image} @@ -86,12 +86,12 @@ def test_move_tag(image_exists, test_tag, expected_status, client, app): @pytest.mark.parametrize( "repo_namespace, repo_name, query_count", [ - ("devtable", "simple", 5), - ("devtable", "history", 5), - ("devtable", "complex", 5), - ("devtable", "gargantuan", 5), - ("buynlarge", "orgrepo", 7), # +2 for permissions checks. - ("buynlarge", "anotherorgrepo", 7), # +2 for permissions checks. + ("devtable", "simple", 4), + ("devtable", "history", 4), + ("devtable", "complex", 4), + ("devtable", "gargantuan", 4), + ("buynlarge", "orgrepo", 6), # +2 for permissions checks. + ("buynlarge", "anotherorgrepo", 6), # +2 for permissions checks. ], ) def test_list_repo_tags(repo_namespace, repo_name, client, query_count, app): @@ -109,18 +109,15 @@ def test_list_repo_tags(repo_namespace, repo_name, client, query_count, app): @pytest.mark.parametrize( - "repository, tag, owned, expect_images", + "repository, tag, expect_images", [ - ("devtable/simple", "prod", False, True), - ("devtable/simple", "prod", True, False), - ("devtable/simple", "latest", False, True), - ("devtable/simple", "latest", True, False), - ("devtable/complex", "prod", False, True), - ("devtable/complex", "prod", True, True), + ("devtable/simple", "prod", True), + ("devtable/simple", "latest", True), + ("devtable/complex", "prod", True), ], ) -def test_list_tag_images(repository, tag, owned, expect_images, client, app): +def test_list_tag_images(repository, tag, expect_images, client, app): with client_with_identity("devtable", client) as cl: - params = {"repository": repository, "tag": tag, "owned": owned} + params = {"repository": repository, "tag": tag} result = conduct_api_call(cl, RepositoryTagImages, "get", params, None, 200).json assert bool(result["images"]) == expect_images diff --git a/endpoints/api/user.py b/endpoints/api/user.py index 064921109..8cb9e0853 100644 --- a/endpoints/api/user.py +++ b/endpoints/api/user.py @@ -1087,7 +1087,7 @@ class StarredRepositoryList(ApiResource): "namespace": repo_obj.namespace_user.username, "name": repo_obj.name, "description": repo_obj.description, - "is_public": repo_obj.visibility.name == "public", + "is_public": model.repository.is_repository_public(repo_obj), } return {"repositories": [repo_view(repo) for repo in repos]}, next_page_token diff --git a/endpoints/appr/models_cnr.py b/endpoints/appr/models_cnr.py index ff077f6e5..c49a4c768 100644 --- a/endpoints/appr/models_cnr.py +++ b/endpoints/appr/models_cnr.py @@ -10,6 +10,7 @@ import data.model from app import app, storage, authentication, model_cache from data import appr_model +from data import model as data_model from data.cache import cache_key from data.database import Repository, MediaType, db_transaction from data.appr_model.models import NEW_MODELS @@ -173,7 +174,7 @@ class CNRAppModel(AppRegistryDataInterface): view = ApplicationSummaryView( namespace=repo.namespace_user.username, name=app_name, - visibility=repo.visibility.name, + visibility=data_model.repository.repository_visibility_name(repo), default=available_releases[0], channels=channels, manifests=manifests, diff --git a/endpoints/secscan.py b/endpoints/secscan.py index 395949b89..057b6d82c 100644 --- a/endpoints/secscan.py +++ b/endpoints/secscan.py @@ -1,33 +1,12 @@ import logging -import json -import features - -from app import secscan_notification_queue -from flask import request, make_response, Blueprint, abort -from endpoints.decorators import route_show_if, anon_allowed +from flask import make_response, Blueprint +from endpoints.decorators import anon_allowed logger = logging.getLogger(__name__) secscan = Blueprint("secscan", __name__) -@route_show_if(features.SECURITY_SCANNER) -@secscan.route("/notify", methods=["POST"]) -def secscan_notification(): - data = request.get_json() - logger.debug("Got notification from Security Scanner: %s", data) - if "Notification" not in data: - abort(400) - - notification = data["Notification"] - name = ["named", notification["Name"]] - - if not secscan_notification_queue.alive(name): - secscan_notification_queue.put(name, json.dumps(notification)) - - return make_response("Okay") - - @secscan.route("/_internal_ping") @anon_allowed def internal_ping(): diff --git a/endpoints/test/test_anon_checked.py b/endpoints/test/test_anon_checked.py index 7595a2b92..95c51085c 100644 --- a/endpoints/test/test_anon_checked.py +++ b/endpoints/test/test_anon_checked.py @@ -3,10 +3,9 @@ import pytest from app import app from endpoints.v1 import v1_bp from endpoints.v2 import v2_bp -from endpoints.verbs import verbs -@pytest.mark.parametrize("blueprint", [v2_bp, v1_bp, verbs,]) +@pytest.mark.parametrize("blueprint", [v2_bp, v1_bp,]) def test_verify_blueprint(blueprint): class Checker(object): def __init__(self): diff --git a/endpoints/v1/registry.py b/endpoints/v1/registry.py index ae94642d5..e38ecc9df 100644 --- a/endpoints/v1/registry.py +++ b/endpoints/v1/registry.py @@ -40,18 +40,7 @@ def require_completion(f): @wraps(f) def wrapper(namespace, repository, *args, **kwargs): - image_id = kwargs["image_id"] - repository_ref = registry_model.lookup_repository(namespace, repository) - if repository_ref is not None: - legacy_image = registry_model.get_legacy_image(repository_ref, image_id) - if legacy_image is not None and legacy_image.uploading: - abort( - 400, - "Image %(image_id)s is being uploaded, retry later", - issue="upload-in-progress", - image_id=image_id, - ) - + # TODO: Remove this return f(namespace, repository, *args, **kwargs) return wrapper @@ -102,7 +91,9 @@ def head_image_layer(namespace, repository, image_id, headers): abort(404) logger.debug("Looking up placement locations") - legacy_image = registry_model.get_legacy_image(repository_ref, image_id, include_blob=True) + legacy_image = registry_model.get_legacy_image( + repository_ref, image_id, store, include_blob=True + ) if legacy_image is None: logger.debug("Could not find any blob placement locations") abort(404, "Image %(image_id)s not found", issue="unknown-image", image_id=image_id) @@ -139,7 +130,9 @@ def get_image_layer(namespace, repository, image_id, headers): if repository_ref is None: abort(404) - legacy_image = registry_model.get_legacy_image(repository_ref, image_id, include_blob=True) + legacy_image = registry_model.get_legacy_image( + repository_ref, image_id, store, include_blob=True + ) if legacy_image is None: abort(404, "Image %(image_id)s not found", issue="unknown-image", image_id=image_id) @@ -351,7 +344,9 @@ def get_image_json(namespace, repository, image_id, headers): abort(403) logger.debug("Looking up repo image") - legacy_image = registry_model.get_legacy_image(repository_ref, image_id, include_blob=True) + legacy_image = registry_model.get_legacy_image( + repository_ref, image_id, store, include_blob=True + ) if legacy_image is None: flask_abort(404) @@ -381,15 +376,12 @@ def get_image_ancestry(namespace, repository, image_id, headers): abort(403) logger.debug("Looking up repo image") - legacy_image = registry_model.get_legacy_image(repository_ref, image_id, include_parents=True) + legacy_image = registry_model.get_legacy_image(repository_ref, image_id, store) if legacy_image is None: abort(404, "Image %(image_id)s not found", issue="unknown-image", image_id=image_id) # NOTE: We can not use jsonify here because we are returning a list not an object. - ancestor_ids = [legacy_image.docker_image_id] + [ - a.docker_image_id for a in legacy_image.parents - ] - response = make_response(json.dumps(ancestor_ids), 200) + response = make_response(json.dumps(legacy_image.full_image_id_chain), 200) response.headers.extend(headers) return response diff --git a/endpoints/v1/tag.py b/endpoints/v1/tag.py index d7cf3f5c6..c2900fb41 100644 --- a/endpoints/v1/tag.py +++ b/endpoints/v1/tag.py @@ -98,7 +98,7 @@ def put_tag(namespace_name, repo_name, tag): # Check if there is an existing image we should use (for PUT calls outside of a normal push # operation). - legacy_image = registry_model.get_legacy_image(repository_ref, image_id) + legacy_image = registry_model.get_legacy_image(repository_ref, image_id, storage) if legacy_image is None: abort(400) diff --git a/endpoints/v2/manifest.py b/endpoints/v2/manifest.py index 8428a6297..d1f2c987f 100644 --- a/endpoints/v2/manifest.py +++ b/endpoints/v2/manifest.py @@ -68,7 +68,7 @@ def fetch_manifest_by_tagname(namespace_name, repo_name, manifest_ref): image_pulls.labels("v2", "tag", 404).inc() raise ManifestUnknown() - manifest = registry_model.get_manifest_for_tag(tag, backfill_if_necessary=True) + manifest = registry_model.get_manifest_for_tag(tag) if manifest is None: # Something went wrong. image_pulls.labels("v2", "tag", 400).inc() diff --git a/endpoints/v2/test/test_blob.py b/endpoints/v2/test/test_blob.py index 3d71a407f..d87921b00 100644 --- a/endpoints/v2/test/test_blob.py +++ b/endpoints/v2/test/test_blob.py @@ -129,12 +129,13 @@ def test_blob_mounting(mount_digest, source_repo, username, expect_success, clie headers=headers, ) + repository = model.repository.get_repository("devtable", "building") + if expect_success: # Ensure the blob now exists under the repo. - model.blob.get_repo_blob_by_digest("devtable", "building", mount_digest) + assert model.oci.blob.get_repository_blob_by_digest(repository, mount_digest) else: - with pytest.raises(model.blob.BlobDoesNotExist): - model.blob.get_repo_blob_by_digest("devtable", "building", mount_digest) + assert model.oci.blob.get_repository_blob_by_digest(repository, mount_digest) is None def test_blob_upload_offset(client, app): diff --git a/endpoints/v2/test/test_manifest_cornercases.py b/endpoints/v2/test/test_manifest_cornercases.py index 0037292cf..38d61e1d4 100644 --- a/endpoints/v2/test/test_manifest_cornercases.py +++ b/endpoints/v2/test/test_manifest_cornercases.py @@ -31,6 +31,23 @@ def _perform_cleanup(): model.gc.garbage_collect_repo(repo_object) +def _get_legacy_image_row_id(tag): + return ( + database.ManifestLegacyImage.select(database.ManifestLegacyImage, database.Image) + .join(database.Image) + .where(database.ManifestLegacyImage.manifest == tag.manifest._db_id) + .get() + .image.docker_image_id + ) + + +def _add_legacy_image(namespace, repo_name, tag_name): + repo_ref = registry_model.lookup_repository(namespace, repo_name) + tag_ref = registry_model.get_repo_tag(repo_ref, tag_name) + manifest_ref = registry_model.get_manifest_for_tag(tag_ref) + registry_model.populate_legacy_images_for_testing(manifest_ref, storage) + + def test_missing_link(initialized_db): """ Tests for a corner case that could result in missing a link to a blob referenced by a manifest. @@ -54,6 +71,8 @@ def test_missing_link(initialized_db): that of `SECOND_ID`, leaving `THIRD_ID` unlinked and therefore, after a GC, missing `FOURTH_BLOB`. """ + # TODO: Remove this test once we stop writing legacy image rows. + with set_tag_expiration_policy("devtable", 0): location_name = storage.preferred_locations[0] location = database.ImageStorageLocation.get(name=location_name) @@ -72,21 +91,19 @@ def test_missing_link(initialized_db): ) _write_manifest(ADMIN_ACCESS_USER, REPO, FIRST_TAG, first_manifest) + _add_legacy_image(ADMIN_ACCESS_USER, REPO, FIRST_TAG) # Delete all temp tags and perform GC. _perform_cleanup() # Ensure that the first blob still exists, along with the first tag. - assert ( - model.blob.get_repo_blob_by_digest(ADMIN_ACCESS_USER, REPO, first_blob_sha) is not None - ) + repo = model.repository.get_repository(ADMIN_ACCESS_USER, REPO) + assert model.oci.blob.get_repository_blob_by_digest(repo, first_blob_sha) is not None repository_ref = registry_model.lookup_repository(ADMIN_ACCESS_USER, REPO) - found_tag = registry_model.get_repo_tag( - repository_ref, FIRST_TAG, include_legacy_image=True - ) + found_tag = registry_model.get_repo_tag(repository_ref, FIRST_TAG) assert found_tag is not None - assert found_tag.legacy_image.docker_image_id == "first" + assert _get_legacy_image_row_id(found_tag) == "first" # Create the second and third blobs. second_blob_sha = "sha256:" + hashlib.sha256(b"SECOND").hexdigest() @@ -108,6 +125,7 @@ def test_missing_link(initialized_db): ) _write_manifest(ADMIN_ACCESS_USER, REPO, SECOND_TAG, second_manifest) + _add_legacy_image(ADMIN_ACCESS_USER, REPO, SECOND_TAG) # Delete all temp tags and perform GC. _perform_cleanup() @@ -117,18 +135,14 @@ def test_missing_link(initialized_db): assert registry_model.get_repo_blob_by_digest(repository_ref, second_blob_sha) is not None assert registry_model.get_repo_blob_by_digest(repository_ref, third_blob_sha) is not None - found_tag = registry_model.get_repo_tag( - repository_ref, FIRST_TAG, include_legacy_image=True - ) + found_tag = registry_model.get_repo_tag(repository_ref, FIRST_TAG) assert found_tag is not None - assert found_tag.legacy_image.docker_image_id == "first" + assert _get_legacy_image_row_id(found_tag) == "first" # Ensure the IDs have changed. - found_tag = registry_model.get_repo_tag( - repository_ref, SECOND_TAG, include_legacy_image=True - ) + found_tag = registry_model.get_repo_tag(repository_ref, SECOND_TAG) assert found_tag is not None - assert found_tag.legacy_image.docker_image_id != "second" + assert _get_legacy_image_row_id(found_tag) != "second" # Create the fourth blob. fourth_blob_sha = "sha256:" + hashlib.sha256(b"FOURTH").hexdigest() @@ -147,6 +161,7 @@ def test_missing_link(initialized_db): ) _write_manifest(ADMIN_ACCESS_USER, REPO, THIRD_TAG, third_manifest) + _add_legacy_image(ADMIN_ACCESS_USER, REPO, THIRD_TAG) # Delete all temp tags and perform GC. _perform_cleanup() @@ -158,10 +173,6 @@ def test_missing_link(initialized_db): assert registry_model.get_repo_blob_by_digest(repository_ref, fourth_blob_sha) is not None # Ensure new synthesized IDs were created. - second_tag = registry_model.get_repo_tag( - repository_ref, SECOND_TAG, include_legacy_image=True - ) - third_tag = registry_model.get_repo_tag( - repository_ref, THIRD_TAG, include_legacy_image=True - ) - assert second_tag.legacy_image.docker_image_id != third_tag.legacy_image.docker_image_id + second_tag = registry_model.get_repo_tag(repository_ref, SECOND_TAG) + third_tag = registry_model.get_repo_tag(repository_ref, THIRD_TAG) + assert _get_legacy_image_row_id(second_tag) != _get_legacy_image_row_id(third_tag) diff --git a/endpoints/verbs/__init__.py b/endpoints/verbs/__init__.py deleted file mode 100644 index e2bb25f7c..000000000 --- a/endpoints/verbs/__init__.py +++ /dev/null @@ -1,535 +0,0 @@ -import hashlib -import json -import logging -import uuid - -from functools import wraps - -from flask import redirect, Blueprint, abort, send_file, make_response, request -from prometheus_client import Counter - -import features - -from app import app, signer, storage, config_provider, ip_resolver, instance_keys -from auth.auth_context import get_authenticated_user -from auth.decorators import process_auth -from auth.permissions import ReadRepositoryPermission -from data import database -from data import model -from data.registry_model import registry_model -from endpoints.decorators import ( - anon_protect, - anon_allowed, - route_show_if, - parse_repository_name, - check_region_blacklisted, -) -from endpoints.metrics import image_pulls, image_pulled_bytes -from endpoints.v2.blob import BLOB_DIGEST_ROUTE -from image.appc import AppCImageFormatter -from image.shared import ManifestException -from image.docker.squashed import SquashedDockerImageFormatter -from storage import Storage -from util.audit import track_and_log, wrap_repository -from util.http import exact_abort -from util.metrics.prometheus import timed_blueprint -from util.registry.filelike import wrap_with_handler -from util.registry.queuefile import QueueFile -from util.registry.queueprocess import QueueProcess -from util.registry.tarlayerformat import TarLayerFormatterReporter - - -logger = logging.getLogger(__name__) -verbs = timed_blueprint(Blueprint("verbs", __name__)) - - -verb_stream_passes = Counter( - "quay_verb_stream_passes_total", - "number of passes over a tar stream used by verb requests", - labelnames=["kind"], -) - - -LAYER_MIMETYPE = "binary/octet-stream" -QUEUE_FILE_TIMEOUT = 15 # seconds - - -class VerbReporter(TarLayerFormatterReporter): - def __init__(self, kind): - self.kind = kind - - def report_pass(self, pass_count): - if pass_count: - verb_stream_passes.labels(self.kind).inc(pass_count) - - -def _open_stream(formatter, tag, schema1_manifest, derived_image_id, handlers, reporter): - """ - This method generates a stream of data which will be replicated and read from the queue files. - - This method runs in a separate process. - """ - # For performance reasons, we load the full image list here, cache it, then disconnect from - # the database. - with database.UseThenDisconnect(app.config): - layers = registry_model.list_parsed_manifest_layers( - tag.repository, schema1_manifest, storage, include_placements=True - ) - - def image_stream_getter(store, blob): - def get_stream_for_storage(): - current_image_stream = store.stream_read_file(blob.placements, blob.storage_path) - logger.debug("Returning blob %s: %s", blob.digest, blob.storage_path) - return current_image_stream - - return get_stream_for_storage - - def tar_stream_getter_iterator(): - # Re-Initialize the storage engine because some may not respond well to forking (e.g. S3) - store = Storage(app, config_provider=config_provider, ip_resolver=ip_resolver) - - # Note: We reverse because we have to start at the leaf layer and move upward, - # as per the spec for the formatters. - for layer in reversed(layers): - yield image_stream_getter(store, layer.blob) - - stream = formatter.build_stream( - tag, - schema1_manifest, - derived_image_id, - layers, - tar_stream_getter_iterator, - reporter=reporter, - ) - - for handler_fn in handlers: - stream = wrap_with_handler(stream, handler_fn) - - return stream.read - - -def _sign_derived_image(verb, derived_image, queue_file): - """ - Read from the queue file and sign the contents which are generated. - - This method runs in a separate process. - """ - signature = None - try: - signature = signer.detached_sign(queue_file) - except Exception as e: - logger.exception( - "Exception when signing %s deriving image %s: $s", verb, derived_image, str(e) - ) - return - - # Setup the database (since this is a new process) and then disconnect immediately - # once the operation completes. - if not queue_file.raised_exception: - with database.UseThenDisconnect(app.config): - registry_model.set_derived_image_signature(derived_image, signer.name, signature) - - -def _write_derived_image_to_storage( - verb, derived_image, queue_file, namespace, repository, tag_name -): - """ - Read from the generated stream and write it back to the storage engine. - - This method runs in a separate process. - """ - - def handle_exception(ex): - logger.debug( - "Exception when building %s derived image %s (%s/%s:%s): %s", - verb, - derived_image, - namespace, - repository, - tag_name, - ex, - ) - - with database.UseThenDisconnect(app.config): - registry_model.delete_derived_image(derived_image) - - queue_file.add_exception_handler(handle_exception) - - # Re-Initialize the storage engine because some may not respond well to forking (e.g. S3) - store = Storage(app, config_provider=config_provider, ip_resolver=ip_resolver) - - try: - store.stream_write( - derived_image.blob.placements, derived_image.blob.storage_path, queue_file - ) - except IOError as ex: - logger.error( - "Exception when writing %s derived image %s (%s/%s:%s): %s", - verb, - derived_image, - namespace, - repository, - tag_name, - ex, - ) - - with database.UseThenDisconnect(app.config): - registry_model.delete_derived_image(derived_image) - - queue_file.close() - - -def _verify_repo_verb(_, namespace, repo_name, tag_name, verb, checker=None): - permission = ReadRepositoryPermission(namespace, repo_name) - repo = model.repository.get_repository(namespace, repo_name) - repo_is_public = repo is not None and model.repository.is_repository_public(repo) - if not permission.can() and not repo_is_public: - logger.debug( - "No permission to read repository %s/%s for user %s with verb %s", - namespace, - repo_name, - get_authenticated_user(), - verb, - ) - abort(403) - - if repo is not None and repo.kind.name != "image": - logger.debug( - "Repository %s/%s for user %s is not an image repo", - namespace, - repo_name, - get_authenticated_user(), - ) - abort(405) - - # Make sure the repo's namespace isn't disabled. - if not registry_model.is_namespace_enabled(namespace): - abort(400) - - # Lookup the requested tag. - repo_ref = registry_model.lookup_repository(namespace, repo_name) - if repo_ref is None: - abort(404) - - tag = registry_model.get_repo_tag(repo_ref, tag_name) - if tag is None: - logger.debug( - "Tag %s does not exist in repository %s/%s for user %s", - tag, - namespace, - repo_name, - get_authenticated_user(), - ) - abort(404) - - # Get its associated manifest. - manifest = registry_model.get_manifest_for_tag(tag, backfill_if_necessary=True) - if manifest is None: - logger.debug("Could not get manifest on %s/%s:%s::%s", namespace, repo_name, tag.name, verb) - abort(404) - - # Retrieve the schema1-compatible version of the manifest. - try: - schema1_manifest = registry_model.get_schema1_parsed_manifest( - manifest, namespace, repo_name, tag.name, storage - ) - except ManifestException: - logger.exception( - "Could not get manifest on %s/%s:%s::%s", namespace, repo_name, tag.name, verb - ) - abort(400) - - if schema1_manifest is None: - abort(404) - - # If there is a data checker, call it first. - if checker is not None: - if not checker(tag, schema1_manifest): - logger.debug( - "Check mismatch on %s/%s:%s, verb %s", namespace, repo_name, tag.name, verb - ) - abort(404) - - # Preload the tag's repository information, so it gets cached. - assert tag.repository.namespace_name - assert tag.repository.name - - return tag, manifest, schema1_manifest - - -def _repo_verb_signature(namespace, repository, tag_name, verb, checker=None, **kwargs): - # Verify that the tag exists and that we have access to it. - tag, manifest, _ = _verify_repo_verb(storage, namespace, repository, tag_name, verb, checker) - - # Find the derived image storage for the verb. - derived_image = registry_model.lookup_derived_image( - manifest, verb, storage, varying_metadata={"tag": tag.name} - ) - - if derived_image is None or derived_image.blob.uploading: - return make_response("", 202) - - # Check if we have a valid signer configured. - if not signer.name: - abort(404) - - # Lookup the signature for the verb. - signature_value = registry_model.get_derived_image_signature(derived_image, signer.name) - if signature_value is None: - abort(404) - - # Return the signature. - return make_response(signature_value) - - -class SimpleHasher(object): - def __init__(self): - self._current_offset = 0 - - def update(self, buf): - self._current_offset += len(buf) - - @property - def hashed_bytes(self): - return self._current_offset - - -@check_region_blacklisted() -def _repo_verb( - namespace, repository, tag_name, verb, formatter, sign=False, checker=None, **kwargs -): - # Verify that the image exists and that we have access to it. - logger.debug( - "Verifying repo verb %s for repository %s/%s with user %s with mimetype %s", - verb, - namespace, - repository, - get_authenticated_user(), - request.accept_mimetypes.best, - ) - tag, manifest, schema1_manifest = _verify_repo_verb( - storage, namespace, repository, tag_name, verb, checker - ) - - # Load the repository for later. - repo = model.repository.get_repository(namespace, repository) - if repo is None: - abort(404) - - # Check for torrent, which is no longer supported. - if request.accept_mimetypes.best == "application/x-bittorrent": - abort(406) - - # Log the action. - track_and_log("repo_verb", wrap_repository(repo), tag=tag.name, verb=verb, **kwargs) - - is_readonly = app.config.get("REGISTRY_STATE", "normal") == "readonly" - - # Lookup/create the derived image for the verb and repo image. - if is_readonly: - derived_image = registry_model.lookup_derived_image( - manifest, verb, storage, varying_metadata={"tag": tag.name}, include_placements=True - ) - else: - derived_image = registry_model.lookup_or_create_derived_image( - manifest, - verb, - storage.preferred_locations[0], - storage, - varying_metadata={"tag": tag.name}, - include_placements=True, - ) - if derived_image is None: - logger.error("Could not create or lookup a derived image for manifest %s", manifest) - abort(400) - - if derived_image is not None and not derived_image.blob.uploading: - logger.debug("Derived %s image %s exists in storage", verb, derived_image) - is_head_request = request.method == "HEAD" - - if derived_image.blob.compressed_size: - image_pulled_bytes.labels("verbs").inc(derived_image.blob.compressed_size) - - download_url = storage.get_direct_download_url( - derived_image.blob.placements, derived_image.blob.storage_path, head=is_head_request - ) - if download_url: - logger.debug("Redirecting to download URL for derived %s image %s", verb, derived_image) - return redirect(download_url) - - # Close the database handle here for this process before we send the long download. - database.close_db_filter(None) - - logger.debug("Sending cached derived %s image %s", verb, derived_image) - return send_file( - storage.stream_read_file( - derived_image.blob.placements, derived_image.blob.storage_path - ), - mimetype=LAYER_MIMETYPE, - ) - - logger.debug("Building and returning derived %s image", verb) - hasher = SimpleHasher() - - # Close the database connection before any process forking occurs. This is important because - # the Postgres driver does not react kindly to forking, so we need to make sure it is closed - # so that each process will get its own unique connection. - database.close_db_filter(None) - - def _cleanup(): - # Close any existing DB connection once the process has exited. - database.close_db_filter(None) - - def _store_metadata_and_cleanup(): - if is_readonly: - return - - with database.UseThenDisconnect(app.config): - registry_model.set_derived_image_size(derived_image, hasher.hashed_bytes) - - # Create a queue process to generate the data. The queue files will read from the process - # and send the results to the client and storage. - unique_id = ( - derived_image.unique_id - if derived_image is not None - else hashlib.sha256(("%s:%s" % (verb, uuid.uuid4())).encode("utf-8")).hexdigest() - ) - handlers = [hasher.update] - reporter = VerbReporter(verb) - args = (formatter, tag, schema1_manifest, unique_id, handlers, reporter) - queue_process = QueueProcess( - _open_stream, - 8 * 1024, - 10 * 1024 * 1024, # 8K/10M chunk/max - args, - finished=_store_metadata_and_cleanup, - ) - - client_queue_file = QueueFile( - queue_process.create_queue(), "client", timeout=QUEUE_FILE_TIMEOUT - ) - - if not is_readonly: - storage_queue_file = QueueFile( - queue_process.create_queue(), "storage", timeout=QUEUE_FILE_TIMEOUT - ) - - # If signing is required, add a QueueFile for signing the image as we stream it out. - signing_queue_file = None - if sign and signer.name: - signing_queue_file = QueueFile( - queue_process.create_queue(), "signing", timeout=QUEUE_FILE_TIMEOUT - ) - - # Start building. - queue_process.run() - - # Start the storage saving. - if not is_readonly: - storage_args = (verb, derived_image, storage_queue_file, namespace, repository, tag_name) - QueueProcess.run_process(_write_derived_image_to_storage, storage_args, finished=_cleanup) - - if sign and signer.name: - signing_args = (verb, derived_image, signing_queue_file) - QueueProcess.run_process(_sign_derived_image, signing_args, finished=_cleanup) - - # Close the database handle here for this process before we send the long download. - database.close_db_filter(None) - - # Return the client's data. - return send_file(client_queue_file, mimetype=LAYER_MIMETYPE) - - -def os_arch_checker(os, arch): - def checker(tag, manifest): - try: - image_json = json.loads(manifest.leaf_layer.raw_v1_metadata) - except ValueError: - logger.exception("Could not parse leaf layer JSON for manifest %s", manifest) - return False - except TypeError: - logger.exception("Could not parse leaf layer JSON for manifest %s", manifest) - return False - - # Verify the architecture and os. - operating_system = image_json.get("os", "linux") - if operating_system != os: - return False - - architecture = image_json.get("architecture", "amd64") - - # Note: Some older Docker images have 'x86_64' rather than 'amd64'. - # We allow the conversion here. - if architecture == "x86_64" and operating_system == "linux": - architecture = "amd64" - - if architecture != arch: - return False - - return True - - return checker - - -def observe_route(protocol): - """ - Decorates verb endpoints to record the image_pulls metric into Prometheus. - """ - - def decorator(func): - @wraps(func) - def wrapper(*args, **kwargs): - rv = func(*args, **kwargs) - image_pulls.labels(protocol, "tag", rv.status_code) - return rv - - return wrapper - - return decorator - - -@route_show_if(features.ACI_CONVERSION) -@anon_protect -@verbs.route("/aci/////sig///", methods=["GET"]) -@verbs.route("/aci/////aci.asc///", methods=["GET"]) -@observe_route("aci") -@process_auth -def get_aci_signature(server, namespace, repository, tag, os, arch): - return _repo_verb_signature( - namespace, repository, tag, "aci", checker=os_arch_checker(os, arch), os=os, arch=arch - ) - - -@route_show_if(features.ACI_CONVERSION) -@anon_protect -@verbs.route( - "/aci/////aci///", methods=["GET", "HEAD"] -) -@observe_route("aci") -@process_auth -def get_aci_image(server, namespace, repository, tag, os, arch): - return _repo_verb( - namespace, - repository, - tag, - "aci", - AppCImageFormatter(), - sign=True, - checker=os_arch_checker(os, arch), - os=os, - arch=arch, - ) - - -@anon_protect -@verbs.route("/squash///", methods=["GET"]) -@observe_route("squash") -@process_auth -def get_squashed_tag(namespace, repository, tag): - return _repo_verb(namespace, repository, tag, "squash", SquashedDockerImageFormatter()) - - -@verbs.route("/_internal_ping") -@anon_allowed -def internal_ping(): - return make_response("true", 200) diff --git a/endpoints/verbs/test/test_security.py b/endpoints/verbs/test/test_security.py deleted file mode 100644 index 5ed065b1d..000000000 --- a/endpoints/verbs/test/test_security.py +++ /dev/null @@ -1,97 +0,0 @@ -import pytest - -from flask import url_for -from endpoints.test.shared import conduct_call, gen_basic_auth -from test.fixtures import * - -NO_ACCESS_USER = "freshuser" -READ_ACCESS_USER = "reader" -ADMIN_ACCESS_USER = "devtable" -CREATOR_ACCESS_USER = "creator" - -PUBLIC_REPO = "public/publicrepo" -PRIVATE_REPO = "devtable/shared" -ORG_REPO = "buynlarge/orgrepo" -ANOTHER_ORG_REPO = "buynlarge/anotherorgrepo" - -ACI_ARGS = { - "server": "someserver", - "tag": "fake", - "os": "linux", - "arch": "x64", -} - - -@pytest.mark.parametrize( - "user", - [ - (0, None), - (1, NO_ACCESS_USER), - (2, READ_ACCESS_USER), - (3, CREATOR_ACCESS_USER), - (4, ADMIN_ACCESS_USER), - ], -) -@pytest.mark.parametrize( - "endpoint,method,repository,single_repo_path,params,expected_statuses", - [ - ("get_aci_signature", "GET", PUBLIC_REPO, False, ACI_ARGS, (404, 404, 404, 404, 404)), - ("get_aci_signature", "GET", PRIVATE_REPO, False, ACI_ARGS, (403, 403, 404, 403, 404)), - ("get_aci_signature", "GET", ORG_REPO, False, ACI_ARGS, (403, 403, 404, 403, 404)), - ("get_aci_signature", "GET", ANOTHER_ORG_REPO, False, ACI_ARGS, (403, 403, 403, 403, 404)), - # get_aci_image - ("get_aci_image", "GET", PUBLIC_REPO, False, ACI_ARGS, (404, 404, 404, 404, 404)), - ("get_aci_image", "GET", PRIVATE_REPO, False, ACI_ARGS, (403, 403, 404, 403, 404)), - ("get_aci_image", "GET", ORG_REPO, False, ACI_ARGS, (403, 403, 404, 403, 404)), - ("get_aci_image", "GET", ANOTHER_ORG_REPO, False, ACI_ARGS, (403, 403, 403, 403, 404)), - # get_squashed_tag - ( - "get_squashed_tag", - "GET", - PUBLIC_REPO, - False, - dict(tag="fake"), - (404, 404, 404, 404, 404), - ), - ( - "get_squashed_tag", - "GET", - PRIVATE_REPO, - False, - dict(tag="fake"), - (403, 403, 404, 403, 404), - ), - ("get_squashed_tag", "GET", ORG_REPO, False, dict(tag="fake"), (403, 403, 404, 403, 404)), - ( - "get_squashed_tag", - "GET", - ANOTHER_ORG_REPO, - False, - dict(tag="fake"), - (403, 403, 403, 403, 404), - ), - ], -) -def test_verbs_security( - user, endpoint, method, repository, single_repo_path, params, expected_statuses, app, client -): - headers = {} - if user[1] is not None: - headers["Authorization"] = gen_basic_auth(user[1], "password") - - if single_repo_path: - params["repository"] = repository - else: - (namespace, repo_name) = repository.split("/") - params["namespace"] = namespace - params["repository"] = repo_name - - conduct_call( - client, - "verbs." + endpoint, - url_for, - method, - params, - expected_code=expected_statuses[user[0]], - headers=headers, - ) diff --git a/endpoints/web.py b/endpoints/web.py index 48072b436..de0738161 100644 --- a/endpoints/web.py +++ b/endpoints/web.py @@ -27,7 +27,6 @@ from app import ( billing as stripe, build_logs, avatar, - signer, log_archive, config_provider, get_app_url, @@ -144,17 +143,6 @@ def user_view(path): return index("") -@route_show_if(features.ACI_CONVERSION) -@web.route("/aci-signing-key") -@no_cache -@anon_protect -def aci_signing_key(): - if not signer.name: - abort(404) - - return send_file(signer.open_public_key_file(), mimetype=PGP_KEY_MIMETYPE) - - @web.route("/plans/") @no_cache @route_show_if(features.BILLING) diff --git a/health/services.py b/health/services.py index 7d831a3c4..0d4c5ea68 100644 --- a/health/services.py +++ b/health/services.py @@ -178,7 +178,6 @@ def _check_disk_space(for_warning): _INSTANCE_SERVICES = { "registry_gunicorn": _check_gunicorn("v1/_internal_ping"), "web_gunicorn": _check_gunicorn("_internal_ping"), - "verbs_gunicorn": _check_gunicorn("c1/_internal_ping"), "service_key": _check_service_key, "disk_space": _check_disk_space(for_warning=False), "jwtproxy": _check_jwt_proxy, diff --git a/image/appc/__init__.py b/image/appc/__init__.py deleted file mode 100644 index 60c74a415..000000000 --- a/image/appc/__init__.py +++ /dev/null @@ -1,227 +0,0 @@ -import json -import re -import calendar - -from uuid import uuid4 - -from app import app -from util.registry.streamlayerformat import StreamLayerMerger -from util.dict_wrappers import JSONPathDict -from image.common import TarImageFormatter - - -ACNAME_REGEX = re.compile(r"[^a-z-]+") - - -class AppCImageFormatter(TarImageFormatter): - """ - Image formatter which produces an tarball according to the AppC specification. - """ - - def stream_generator( - self, - tag, - parsed_manifest, - synthetic_image_id, - layer_iterator, - tar_stream_getter_iterator, - reporter=None, - ): - image_mtime = 0 - created = parsed_manifest.created_datetime - if created is not None: - image_mtime = calendar.timegm(created.utctimetuple()) - - # ACI Format (.tar): - # manifest - The JSON manifest - # rootfs - The root file system - - # Yield the manifest. - aci_manifest = json.dumps( - DockerV1ToACIManifestTranslator.build_manifest(tag, parsed_manifest, synthetic_image_id) - ) - yield self.tar_file("manifest", aci_manifest.encode("utf-8"), mtime=image_mtime) - - # Yield the merged layer dtaa. - yield self.tar_folder("rootfs", mtime=image_mtime) - - layer_merger = StreamLayerMerger( - tar_stream_getter_iterator, path_prefix="rootfs/", reporter=reporter - ) - for entry in layer_merger.get_generator(): - yield entry - - -class DockerV1ToACIManifestTranslator(object): - @staticmethod - def _build_isolators(docker_config): - """ - Builds ACI isolator config from the docker config. - """ - - def _isolate_memory(memory): - return {"name": "memory/limit", "value": {"request": str(memory) + "B",}} - - def _isolate_swap(memory): - return {"name": "memory/swap", "value": {"request": str(memory) + "B",}} - - def _isolate_cpu(cpu): - return {"name": "cpu/shares", "value": {"request": str(cpu),}} - - def _isolate_capabilities(capabilities_set_value): - capabilities_set = re.split(r"[\s,]", capabilities_set_value) - return {"name": "os/linux/capabilities-retain-set", "value": {"set": capabilities_set,}} - - mappers = { - "Memory": _isolate_memory, - "MemorySwap": _isolate_swap, - "CpuShares": _isolate_cpu, - "Cpuset": _isolate_capabilities, - } - - isolators = [] - - for config_key in mappers: - value = docker_config.get(config_key) - if value: - isolators.append(mappers[config_key](value)) - - return isolators - - @staticmethod - def _build_ports(docker_config): - """ - Builds the ports definitions for the ACI. - - Formats: - port/tcp - port/udp - port - """ - ports = [] - - exposed_ports = docker_config["ExposedPorts"] - if exposed_ports is not None: - port_list = list(exposed_ports.keys()) - else: - port_list = docker_config["Ports"] or docker_config["ports"] or [] - - for docker_port in port_list: - protocol = "tcp" - port_number = -1 - - if "/" in docker_port: - (port_number, protocol) = docker_port.split("/") - else: - port_number = docker_port - - try: - port_number = int(port_number) - ports.append( - {"name": "port-%s" % port_number, "port": port_number, "protocol": protocol,} - ) - except ValueError: - pass - - return ports - - @staticmethod - def _ac_name(value): - sanitized = ACNAME_REGEX.sub("-", value.lower()).strip("-") - if sanitized == "": - return str(uuid4()) - return sanitized - - @staticmethod - def _build_volumes(docker_config): - """ - Builds the volumes definitions for the ACI. - """ - volumes = [] - - def get_name(docker_volume_path): - volume_name = DockerV1ToACIManifestTranslator._ac_name(docker_volume_path) - return "volume-%s" % volume_name - - volume_list = docker_config["Volumes"] or docker_config["volumes"] or {} - for docker_volume_path in volume_list.keys(): - if not docker_volume_path: - continue - - volumes.append( - { - "name": get_name(docker_volume_path), - "path": docker_volume_path, - "readOnly": False, - } - ) - return volumes - - @staticmethod - def build_manifest(tag, manifest, synthetic_image_id): - """ - Builds an ACI manifest of an existing repository image. - """ - docker_layer_data = JSONPathDict(json.loads(manifest.leaf_layer.raw_v1_metadata)) - config = docker_layer_data["config"] or JSONPathDict({}) - - namespace = tag.repository.namespace_name - repo_name = tag.repository.name - source_url = "%s://%s/%s/%s:%s" % ( - app.config["PREFERRED_URL_SCHEME"], - app.config["SERVER_HOSTNAME"], - namespace, - repo_name, - tag.name, - ) - - # ACI requires that the execution command be absolutely referenced. Therefore, if we find - # a relative command, we give it as an argument to /bin/sh to resolve and execute for us. - entrypoint = config["Entrypoint"] or [] - exec_path = entrypoint + (config["Cmd"] or []) - if exec_path and not exec_path[0].startswith("/"): - exec_path = ["/bin/sh", "-c", '""%s""' % " ".join(exec_path)] - - # TODO: ACI doesn't support : in the name, so remove any ports. - hostname = app.config["SERVER_HOSTNAME"] - hostname = hostname.split(":", 1)[0] - - # Calculate the environment variables. - docker_env_vars = config.get("Env") or [] - env_vars = [] - for var in docker_env_vars: - pieces = var.split("=") - if len(pieces) != 2: - continue - - env_vars.append(pieces) - - manifest = { - "acKind": "ImageManifest", - "acVersion": "0.6.1", - "name": "%s/%s/%s" % (hostname.lower(), namespace.lower(), repo_name.lower()), - "labels": [ - {"name": "version", "value": tag.name,}, - {"name": "arch", "value": docker_layer_data.get("architecture") or "amd64"}, - {"name": "os", "value": docker_layer_data.get("os") or "linux"}, - ], - "app": { - "exec": exec_path, - # Below, `or 'root'` is required to replace empty string from Dockerfiles. - "user": config.get("User") or "root", - "group": config.get("Group") or "root", - "eventHandlers": [], - "workingDirectory": config.get("WorkingDir") or "/", - "environment": [{"name": key, "value": value} for (key, value) in env_vars], - "isolators": DockerV1ToACIManifestTranslator._build_isolators(config), - "mountPoints": DockerV1ToACIManifestTranslator._build_volumes(config), - "ports": DockerV1ToACIManifestTranslator._build_ports(config), - "annotations": [ - {"name": "created", "value": docker_layer_data.get("created") or ""}, - {"name": "homepage", "value": source_url}, - {"name": "quay.io/derived-image", "value": synthetic_image_id}, - ], - }, - } - - return manifest diff --git a/image/appc/test/test_appc.py b/image/appc/test/test_appc.py deleted file mode 100644 index a068a0c4a..000000000 --- a/image/appc/test/test_appc.py +++ /dev/null @@ -1,74 +0,0 @@ -import pytest - -from image.appc import DockerV1ToACIManifestTranslator -from util.dict_wrappers import JSONPathDict - - -EXAMPLE_MANIFEST_OBJ = { - "architecture": "amd64", - "config": { - "Hostname": "1d811a9194c4", - "Domainname": "", - "User": "", - "AttachStdin": False, - "AttachStdout": False, - "AttachStderr": False, - "ExposedPorts": {"2379/tcp": {}, "2380/tcp": {}}, - "Tty": False, - "OpenStdin": False, - "StdinOnce": False, - "Env": ["PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"], - "Cmd": ["/usr/local/bin/etcd"], - "ArgsEscaped": True, - "Image": "sha256:4c86d1f362d42420c137846fae31667ee85ce6f2cab406cdff26a8ff8a2c31c4", - "Volumes": None, - "WorkingDir": "", - "Entrypoint": None, - "OnBuild": [], - "Labels": {}, - }, - "container": "5a3565ce9b808a0eb0bcbc966dad624f76ad308ad24e11525b5da1201a1df135", - "container_config": { - "Hostname": "1d811a9194c4", - "Domainname": "", - "User": "", - "AttachStdin": False, - "AttachStdout": False, - "AttachStderr": False, - "ExposedPorts": {"2379/tcp": {}, "2380/tcp": {}}, - "Tty": False, - "OpenStdin": False, - "StdinOnce": False, - "Env": ["PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"], - "Cmd": ["/bin/sh", "-c", '#(nop) CMD ["/usr/local/bin/etcd"]'], - "ArgsEscaped": True, - "Image": "sha256:4c86d1f362d42420c137846fae31667ee85ce6f2cab406cdff26a8ff8a2c31c4", - "Volumes": None, - "WorkingDir": "", - "Entrypoint": None, - "OnBuild": [], - "Labels": {}, - }, - "created": "2016-11-11T19:03:55.137387628Z", - "docker_version": "1.11.1", - "id": "3314a3781a526fe728e2e96cfcfb3cc0de901b5c102e6204e8b0155c8f7d5fd2", - "os": "linux", - "parent": "625342ec4d0f3d7a96fd3bb1ef0b4b0b6bc65ebb3d252fd33af0691f7984440e", - "throwaway": True, -} - - -@pytest.mark.parametrize( - "vcfg,expected", - [ - ({"Volumes": None}, []), - ({"Volumes": {}}, []), - ({"Volumes": {"/bin": {}}}, [{"name": "volume-bin", "path": "/bin", "readOnly": False}]), - ({"volumes": None}, []), - ({"volumes": {}}, []), - ({"volumes": {"/bin": {}}}, [{"name": "volume-bin", "path": "/bin", "readOnly": False}]), - ], -) -def test_volume_version_easy(vcfg, expected): - output = DockerV1ToACIManifestTranslator._build_volumes(JSONPathDict(vcfg)) - assert output == expected diff --git a/image/common.py b/image/common.py deleted file mode 100644 index 7efd9731f..000000000 --- a/image/common.py +++ /dev/null @@ -1,89 +0,0 @@ -import tarfile -from util.registry.gzipwrap import GzipWrap - - -class TarImageFormatter(object): - """ - Base class for classes which produce a tar containing image and layer data. - """ - - def build_stream( - self, - tag, - manifest, - synthetic_image_id, - layer_iterator, - tar_stream_getter_iterator, - reporter=None, - ): - """ - Builds and streams a synthetic .tar.gz that represents the formatted tar created by this - class's implementation. - """ - return GzipWrap( - self.stream_generator( - tag, - manifest, - synthetic_image_id, - layer_iterator, - tar_stream_getter_iterator, - reporter=reporter, - ) - ) - - def stream_generator( - self, - tag, - manifest, - synthetic_image_id, - layer_iterator, - tar_stream_getter_iterator, - reporter=None, - ): - raise NotImplementedError - - def tar_file(self, name, contents, mtime=None): - """ - Returns the tar binary representation for a file with the given name and file contents. - """ - assert isinstance(contents, bytes) - length = len(contents) - tar_data = self.tar_file_header(name, length, mtime=mtime) - tar_data += contents - tar_data += self.tar_file_padding(length) - return tar_data - - def tar_file_padding(self, length): - """ - Returns tar file padding for file data of the given length. - """ - if length % 512 != 0: - return b"\0" * (512 - (length % 512)) - - return b"" - - def tar_file_header(self, name, file_size, mtime=None): - """ - Returns tar file header data for a file with the given name and size. - """ - info = tarfile.TarInfo(name=name) - info.type = tarfile.REGTYPE - info.size = file_size - - if mtime is not None: - info.mtime = mtime - return info.tobuf() - - def tar_folder(self, name, mtime=None): - """ - Returns tar file header data for a folder with the given name. - """ - info = tarfile.TarInfo(name=name) - info.type = tarfile.DIRTYPE - - if mtime is not None: - info.mtime = mtime - - # allow the directory to be readable by non-root users - info.mode = 0o755 - return info.tobuf() diff --git a/image/docker/schema1.py b/image/docker/schema1.py index 3d1bde8ec..7e541f198 100644 --- a/image/docker/schema1.py +++ b/image/docker/schema1.py @@ -220,7 +220,17 @@ class DockerSchema1Manifest(ManifestInterface): Raises a ManifestException on failure. """ - # Already validated. + # Validate the parent image IDs. + encountered_ids = set() + for layer in self.layers: + if layer.v1_metadata.parent_image_id: + if layer.v1_metadata.parent_image_id not in encountered_ids: + raise ManifestException( + "Unknown parent image %s" % layer.v1_metadata.parent_image_id + ) + + if layer.v1_metadata.image_id: + encountered_ids.add(layer.v1_metadata.image_id) @property def is_signed(self): @@ -283,6 +293,10 @@ class DockerSchema1Manifest(ManifestInterface): @property def layers_compressed_size(self): + return sum(l.compressed_size for l in self.layers if l.compressed_size is not None) + + @property + def config_media_type(self): return None @property diff --git a/image/docker/schema2/list.py b/image/docker/schema2/list.py index b2bfbe757..0d29a8229 100644 --- a/image/docker/schema2/list.py +++ b/image/docker/schema2/list.py @@ -6,7 +6,7 @@ from jsonschema import validate as validate_schema, ValidationError from digest import digest_tools from image.shared import ManifestException -from image.shared.interfaces import ManifestInterface +from image.shared.interfaces import ManifestListInterface from image.shared.schemautil import LazyManifestLoader from image.docker.schema1 import DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE from image.docker.schema1 import DockerSchema1Manifest @@ -53,7 +53,7 @@ class MismatchManifestException(MalformedSchema2ManifestList): pass -class DockerSchema2ManifestList(ManifestInterface): +class DockerSchema2ManifestList(ManifestListInterface): METASCHEMA = { "type": "object", "properties": { @@ -228,6 +228,10 @@ class DockerSchema2ManifestList(ManifestInterface): def layers_compressed_size(self): return None + @property + def config_media_type(self): + return None + @lru_cache(maxsize=1) def manifests(self, content_retriever): """ @@ -249,6 +253,20 @@ class DockerSchema2ManifestList(ManifestInterface): for m in manifests ] + @property + def amd64_linux_manifest_digest(self): + """ Returns the digest of the AMD64+Linux manifest in this list, if any, or None + if none. + """ + for manifest_ref in self._parsed[DOCKER_SCHEMA2_MANIFESTLIST_MANIFESTS_KEY]: + platform = manifest_ref[DOCKER_SCHEMA2_MANIFESTLIST_PLATFORM_KEY] + architecture = platform[DOCKER_SCHEMA2_MANIFESTLIST_ARCHITECTURE_KEY] + os = platform[DOCKER_SCHEMA2_MANIFESTLIST_OS_KEY] + if architecture == "amd64" and os == "linux": + return manifest_ref[DOCKER_SCHEMA2_MANIFESTLIST_DIGEST_KEY] + + return None + def validate(self, content_retriever): """ Performs validation of required assertions about the manifest. diff --git a/image/docker/schema2/manifest.py b/image/docker/schema2/manifest.py index 8716851a4..6a244a56f 100644 --- a/image/docker/schema2/manifest.py +++ b/image/docker/schema2/manifest.py @@ -172,7 +172,7 @@ class DockerSchema2Manifest(ManifestInterface): Raises a ManifestException on failure. """ - # Nothing to validate. + self._get_built_config(content_retriever) @property def is_manifest_list(self): @@ -222,6 +222,12 @@ class DockerSchema2Manifest(ManifestInterface): def layers_compressed_size(self): return sum(layer.compressed_size for layer in self.filesystem_layers) + @property + def config_media_type(self): + return self._parsed[DOCKER_SCHEMA2_MANIFEST_CONFIG_KEY][ + DOCKER_SCHEMA2_MANIFEST_MEDIATYPE_KEY + ] + @property def has_remote_layer(self): for layer in self.filesystem_layers: diff --git a/image/docker/schema2/test/test_list.py b/image/docker/schema2/test/test_list.py index 568a67521..88e70fcd3 100644 --- a/image/docker/schema2/test/test_list.py +++ b/image/docker/schema2/test/test_list.py @@ -50,7 +50,7 @@ MANIFESTLIST_BYTES = json.dumps( }, { "mediaType": "application/vnd.docker.distribution.manifest.v1+json", - "size": 878, + "size": 1051, "digest": "sha256:5b", "platform": {"architecture": "amd64", "os": "linux", "features": ["sse4"]}, }, @@ -84,6 +84,8 @@ def test_valid_manifestlist(): assert manifestlist.bytes.as_encoded_str() == MANIFESTLIST_BYTES assert manifestlist.manifest_dict == json.loads(MANIFESTLIST_BYTES) assert manifestlist.get_layers(retriever) is None + assert manifestlist.config_media_type is None + assert manifestlist.layers_compressed_size is None assert not manifestlist.blob_digests for index, manifest in enumerate(manifestlist.manifests(retriever)): @@ -114,6 +116,8 @@ def test_valid_manifestlist(): # Ensure it validates. manifestlist.validate(retriever) + assert manifestlist.amd64_linux_manifest_digest == "sha256:5b" + def test_get_schema1_manifest_no_matching_list(): manifestlist = DockerSchema2ManifestList(Bytes.for_string_or_unicode(NO_AMD_MANIFESTLIST_BYTES)) @@ -121,6 +125,7 @@ def test_get_schema1_manifest_no_matching_list(): assert manifestlist.media_type == "application/vnd.docker.distribution.manifest.list.v2+json" assert manifestlist.bytes.as_encoded_str() == NO_AMD_MANIFESTLIST_BYTES + assert manifestlist.amd64_linux_manifest_digest is None compatible_manifest = manifestlist.get_schema1_manifest("foo", "bar", "baz", retriever) assert compatible_manifest is None @@ -130,10 +135,22 @@ def test_builder(): existing = DockerSchema2ManifestList(Bytes.for_string_or_unicode(MANIFESTLIST_BYTES)) builder = DockerSchema2ManifestListBuilder() for index, manifest in enumerate(existing.manifests(retriever)): - builder.add_manifest(manifest.manifest_obj, "amd64", "os") + builder.add_manifest(manifest.manifest_obj, "amd64", "linux") built = builder.build() assert len(built.manifests(retriever)) == 2 + assert built.amd64_linux_manifest_digest is not None + + +def test_builder_no_amd(): + existing = DockerSchema2ManifestList(Bytes.for_string_or_unicode(MANIFESTLIST_BYTES)) + builder = DockerSchema2ManifestListBuilder() + for index, manifest in enumerate(existing.manifests(retriever)): + builder.add_manifest(manifest.manifest_obj, "intel386", "os") + + built = builder.build() + assert len(built.manifests(retriever)) == 2 + assert built.amd64_linux_manifest_digest is None def test_invalid_manifestlist(): diff --git a/image/docker/schema2/test/test_manifest.py b/image/docker/schema2/test/test_manifest.py index ca1d4e7f7..c5a50ffe2 100644 --- a/image/docker/schema2/test/test_manifest.py +++ b/image/docker/schema2/test/test_manifest.py @@ -119,6 +119,8 @@ def test_valid_manifest(): assert manifest.media_type == "application/vnd.docker.distribution.manifest.v2+json" assert not manifest.has_remote_layer assert manifest.has_legacy_image + assert manifest.config_media_type == "application/vnd.docker.container.image.v1+json" + assert manifest.layers_compressed_size == 123721 retriever = ContentRetrieverForTesting.for_config( { @@ -171,6 +173,8 @@ def test_valid_remote_manifest(): ) assert manifest.media_type == "application/vnd.docker.distribution.manifest.v2+json" assert manifest.has_remote_layer + assert manifest.config_media_type == "application/vnd.docker.container.image.v1+json" + assert manifest.layers_compressed_size == 123721 assert len(manifest.filesystem_layers) == 4 assert manifest.filesystem_layers[0].compressed_size == 1234 diff --git a/image/docker/squashed.py b/image/docker/squashed.py deleted file mode 100644 index f4927f378..000000000 --- a/image/docker/squashed.py +++ /dev/null @@ -1,149 +0,0 @@ -import copy -import json -import math -import calendar - -from app import app -from image.common import TarImageFormatter -from util.registry.gzipwrap import GZIP_BUFFER_SIZE -from util.registry.streamlayerformat import StreamLayerMerger - - -class FileEstimationException(Exception): - """ - Exception raised by build_docker_load_stream if the estimated size of the layer tar was lower - than the actual size. - - This means the sent tar header is wrong, and we have to fail. - """ - - pass - - -class SquashedDockerImageFormatter(TarImageFormatter): - """ - Image formatter which produces a squashed image compatible with the `docker load` command. - """ - - # Multiplier against the image size reported by Docker to account for the tar metadata. - # Note: This multiplier was not formally calculated in anyway and should be adjusted overtime - # if/when we encounter issues with it. Unfortunately, we cannot make it too large or the Docker - # daemon dies when trying to load the entire tar into memory. - SIZE_MULTIPLIER = 1.2 - - def stream_generator( - self, - tag, - parsed_manifest, - synthetic_image_id, - layer_iterator, - tar_stream_getter_iterator, - reporter=None, - ): - image_mtime = 0 - created = parsed_manifest.created_datetime - if created is not None: - image_mtime = calendar.timegm(created.utctimetuple()) - - # Docker import V1 Format (.tar): - # repositories - JSON file containing a repo -> tag -> image map - # {image ID folder}: - # json - The layer JSON - # layer.tar - The tarballed contents of the layer - # VERSION - The docker import version: '1.0' - layer_merger = StreamLayerMerger(tar_stream_getter_iterator, reporter=reporter) - - # Yield the repositories file: - synthetic_layer_info = {} - synthetic_layer_info[tag.name + ".squash"] = synthetic_image_id - - hostname = app.config["SERVER_HOSTNAME"] - repositories = {} - namespace = tag.repository.namespace_name - repository = tag.repository.name - repositories[hostname + "/" + namespace + "/" + repository] = synthetic_layer_info - - yield self.tar_file( - "repositories", json.dumps(repositories).encode("utf-8"), mtime=image_mtime - ) - - # Yield the image ID folder. - yield self.tar_folder(synthetic_image_id, mtime=image_mtime) - - # Yield the JSON layer data. - layer_json = SquashedDockerImageFormatter._build_layer_json( - parsed_manifest, synthetic_image_id - ) - yield self.tar_file( - synthetic_image_id + "/json", json.dumps(layer_json).encode("utf-8"), mtime=image_mtime - ) - - # Yield the VERSION file. - yield self.tar_file(synthetic_image_id + "/VERSION", b"1.0", mtime=image_mtime) - - # Yield the merged layer data's header. - estimated_file_size = 0 - for layer in layer_iterator: - estimated_file_size += layer.estimated_size( - SquashedDockerImageFormatter.SIZE_MULTIPLIER - ) - - # Make sure the estimated file size is an integer number of bytes. - estimated_file_size = int(math.ceil(estimated_file_size)) - - yield self.tar_file_header( - synthetic_image_id + "/layer.tar", estimated_file_size, mtime=image_mtime - ) - - # Yield the contents of the merged layer. - yielded_size = 0 - for entry in layer_merger.get_generator(): - yield entry - yielded_size += len(entry) - - # If the yielded size is more than the estimated size (which is unlikely but possible), then - # raise an exception since the tar header will be wrong. - if yielded_size > estimated_file_size: - leaf_image_id = parsed_manifest.leaf_layer_v1_image_id - message = "For %s/%s:%s (%s:%s): Expected %s bytes, found %s bytes" % ( - namespace, - repository, - tag, - parsed_manifest.digest, - leaf_image_id, - estimated_file_size, - yielded_size, - ) - raise FileEstimationException(message) - - # If the yielded size is less than the estimated size (which is likely), fill the rest with - # zeros. - if yielded_size < estimated_file_size: - to_yield = estimated_file_size - yielded_size - while to_yield > 0: - yielded = min(to_yield, GZIP_BUFFER_SIZE) - yield b"\0" * yielded - to_yield -= yielded - - # Yield any file padding to 512 bytes that is necessary. - yield self.tar_file_padding(estimated_file_size) - - # Last two records are empty in tar spec. - yield b"\0" * 512 - yield b"\0" * 512 - - @staticmethod - def _build_layer_json(manifest, synthetic_image_id): - updated_json = json.loads(manifest.leaf_layer.raw_v1_metadata) - updated_json["id"] = synthetic_image_id - - if "parent" in updated_json: - del updated_json["parent"] - - if "config" in updated_json and "Image" in updated_json["config"]: - updated_json["config"]["Image"] = synthetic_image_id - - if "container_config" in updated_json and "Image" in updated_json["container_config"]: - updated_json["container_config"]["Image"] = synthetic_image_id - - return updated_json diff --git a/image/docker/test/test_schema1.py b/image/docker/test/test_schema1.py index b86270e97..c9e27a936 100644 --- a/image/docker/test/test_schema1.py +++ b/image/docker/test/test_schema1.py @@ -37,10 +37,12 @@ MANIFEST_BYTES = json.dumps( "tag": "latest", "architecture": "amd64", "fsLayers": [ + {"blobSum": "sha256:cd8567d70002e957612902a8e985ea129d831ebe04057d88fb644857caa45d11"}, {"blobSum": "sha256:cc8567d70002e957612902a8e985ea129d831ebe04057d88fb644857caa45d11"}, {"blobSum": "sha256:5f70bf18a086007016e948b04aed3b82103a36bea41755b6cddfaf10ace3c6ef"}, ], "history": [ + {"v1Compatibility": '{"id":"sizedid", "parent": "someid", "Size": 1234}'}, {"v1Compatibility": '{"id":"someid", "parent": "anotherid"}'}, {"v1Compatibility": '{"id":"anotherid"}'}, ], @@ -71,10 +73,12 @@ def test_valid_manifest(): assert manifest.namespace == "" assert manifest.repo_name == "hello-world" assert manifest.tag == "latest" - assert manifest.image_ids == {"someid", "anotherid"} - assert manifest.parent_image_ids == {"anotherid"} + assert manifest.image_ids == {"sizedid", "someid", "anotherid"} + assert manifest.parent_image_ids == {"someid", "anotherid"} + assert manifest.layers_compressed_size == 1234 + assert manifest.config_media_type is None - assert len(manifest.layers) == 2 + assert len(manifest.layers) == 3 assert manifest.layers[0].v1_metadata.image_id == "anotherid" assert manifest.layers[0].v1_metadata.parent_image_id is None @@ -82,10 +86,14 @@ def test_valid_manifest(): assert manifest.layers[1].v1_metadata.image_id == "someid" assert manifest.layers[1].v1_metadata.parent_image_id == "anotherid" + assert manifest.layers[2].v1_metadata.image_id == "sizedid" + assert manifest.layers[2].v1_metadata.parent_image_id == "someid" + assert manifest.layers[0].compressed_size is None assert manifest.layers[1].compressed_size is None + assert manifest.layers[2].compressed_size == 1234 - assert manifest.leaf_layer == manifest.layers[1] + assert manifest.leaf_layer == manifest.layers[2] assert manifest.created_datetime is None unsigned = manifest.unsigned() @@ -97,8 +105,8 @@ def test_valid_manifest(): assert unsigned.digest != manifest.digest image_layers = list(manifest.get_layers(None)) - assert len(image_layers) == 2 - for index in range(0, 2): + assert len(image_layers) == 3 + for index in range(0, 3): assert image_layers[index].layer_id == manifest.layers[index].v1_metadata.image_id assert image_layers[index].blob_digest == manifest.layers[index].digest assert image_layers[index].command == manifest.layers[index].v1_metadata.command diff --git a/image/oci/index.py b/image/oci/index.py index fa6bd341a..1aff53f8b 100644 --- a/image/oci/index.py +++ b/image/oci/index.py @@ -41,7 +41,7 @@ from jsonschema import validate as validate_schema, ValidationError from digest import digest_tools from image.shared import ManifestException -from image.shared.interfaces import ManifestInterface +from image.shared.interfaces import ManifestListInterface from image.shared.schemautil import LazyManifestLoader from image.oci import OCI_IMAGE_INDEX_CONTENT_TYPE, OCI_IMAGE_MANIFEST_CONTENT_TYPE from image.oci.descriptor import get_descriptor_schema @@ -81,7 +81,7 @@ class MalformedIndex(ManifestException): pass -class OCIIndex(ManifestInterface): +class OCIIndex(ManifestListInterface): METASCHEMA = { "type": "object", "properties": { @@ -227,6 +227,10 @@ class OCIIndex(ManifestInterface): def layers_compressed_size(self): return None + @property + def config_media_type(self): + return None + @lru_cache(maxsize=1) def manifests(self, content_retriever): """ @@ -275,6 +279,20 @@ class OCIIndex(ManifestInterface): def has_legacy_image(self): return False + @property + def amd64_linux_manifest_digest(self): + """ Returns the digest of the AMD64+Linux manifest in this list, if any, or None + if none. + """ + for manifest_ref in self._parsed[INDEX_MANIFESTS_KEY]: + platform = manifest_ref[INDEX_PLATFORM_KEY] + architecture = platform.get(INDEX_ARCHITECTURE_KEY, None) + os = platform.get(INDEX_OS_KEY, None) + if architecture == "amd64" and os == "linux": + return manifest_ref[INDEX_DIGEST_KEY] + + return None + def get_requires_empty_layer_blob(self, content_retriever): return False diff --git a/image/oci/manifest.py b/image/oci/manifest.py index 909c75263..b3e44b1c4 100644 --- a/image/oci/manifest.py +++ b/image/oci/manifest.py @@ -197,6 +197,10 @@ class OCIManifest(ManifestInterface): """ return self.filesystem_layers[-1] + @property + def config_media_type(self): + return self._parsed[OCI_MANIFEST_CONFIG_KEY][OCI_MANIFEST_MEDIATYPE_KEY] + @property def layers_compressed_size(self): return sum(layer.compressed_size for layer in self.filesystem_layers) diff --git a/image/oci/test/test_oci_index.py b/image/oci/test/test_oci_index.py index 84c8d747c..0825df0af 100644 --- a/image/oci/test/test_oci_index.py +++ b/image/oci/test/test_oci_index.py @@ -34,6 +34,35 @@ SAMPLE_INDEX = """{ }""" +SAMPLE_INDEX_NO_AMD = """{ + "schemaVersion": 2, + "manifests": [ + { + "mediaType": "application/vnd.oci.image.manifest.v1+json", + "size": 7143, + "digest": "sha256:e692418e4cbaf90ca69d05a66403747baa33ee08806650b51fab815ad7fc331f", + "platform": { + "architecture": "ppc64le", + "os": "linux" + } + }, + { + "mediaType": "application/vnd.oci.image.manifest.v1+json", + "size": 7682, + "digest": "sha256:5b0bcabd1ed22e9fb1310cf6c2dec7cdef19f0ad69efa1f392e94a4333501270", + "platform": { + "architecture": "intel386", + "os": "linux" + } + } + ], + "annotations": { + "com.example.key1": "value1", + "com.example.key2": "value2" + } +}""" + + def test_parse_basic_index(): index = OCIIndex(Bytes.for_string_or_unicode(SAMPLE_INDEX)) assert index.is_manifest_list @@ -43,6 +72,10 @@ def test_parse_basic_index(): "sha256:e692418e4cbaf90ca69d05a66403747baa33ee08806650b51fab815ad7fc331f", "sha256:5b0bcabd1ed22e9fb1310cf6c2dec7cdef19f0ad69efa1f392e94a4333501270", ] + assert ( + index.amd64_linux_manifest_digest + == "sha256:5b0bcabd1ed22e9fb1310cf6c2dec7cdef19f0ad69efa1f392e94a4333501270" + ) def test_config_missing_required(): @@ -56,3 +89,15 @@ def test_config_missing_required(): def test_invalid_index(): with pytest.raises(MalformedIndex): OCIIndex(Bytes.for_string_or_unicode("{}")) + + +def test_index_without_amd(): + index = OCIIndex(Bytes.for_string_or_unicode(SAMPLE_INDEX_NO_AMD)) + assert index.is_manifest_list + assert index.digest == "sha256:a0ed0f2b3949bc731063320667062307faf4245f6872dc5bc98ee6ea5443f169" + assert index.local_blob_digests == [] + assert index.child_manifest_digests() == [ + "sha256:e692418e4cbaf90ca69d05a66403747baa33ee08806650b51fab815ad7fc331f", + "sha256:5b0bcabd1ed22e9fb1310cf6c2dec7cdef19f0ad69efa1f392e94a4333501270", + ] + assert index.amd64_linux_manifest_digest is None diff --git a/image/shared/interfaces.py b/image/shared/interfaces.py index 661f840ae..bd158cb62 100644 --- a/image/shared/interfaces.py +++ b/image/shared/interfaces.py @@ -56,6 +56,12 @@ class ManifestInterface(object): Returns None if this cannot be computed locally. """ + @abstractproperty + def config_media_type(self): + """ Returns the media type of the config of this manifest or None if + this manifest does not support a configuration type. + """ + @abstractmethod def validate(self, content_retriever): """ @@ -184,6 +190,19 @@ class ManifestInterface(object): """ +@add_metaclass(ABCMeta) +class ManifestListInterface(object): + """ + Defines the interface for the various manifest list types supported. + """ + + @abstractmethod + def amd64_linux_manifest_digest(self): + """ Returns the digest of the AMD64+Linux manifest in this list, if any, or None + if none. + """ + + @add_metaclass(ABCMeta) class ContentRetriever(object): """ diff --git a/initdb.py b/initdb.py index cd7802ffa..bd17a8481 100644 --- a/initdb.py +++ b/initdb.py @@ -174,6 +174,7 @@ def __create_manifest_and_tags( config = { "id": current_id, + "Size": len(content), } if parent_id: config["parent"] = parent_id @@ -1239,6 +1240,8 @@ WHITELISTED_EMPTY_MODELS = [ "LogEntry", "LogEntry2", "ManifestSecurityStatus", + "ManifestLegacyImage", + "Image", ] diff --git a/requirements-nover.txt b/requirements-nover.txt index 1450b97c6..0f2aa6565 100644 --- a/requirements-nover.txt +++ b/requirements-nover.txt @@ -34,6 +34,7 @@ geoip2 gevent gipc gunicorn +hashids hiredis html5lib==0.9999999 # pinned due to xhtml2pdf httmock diff --git a/requirements.txt b/requirements.txt index 0f85f228a..29c22d298 100644 --- a/requirements.txt +++ b/requirements.txt @@ -68,9 +68,9 @@ futures==3.1.1 geoip2==3.0.0 gevent==1.4.0 gipc==1.0.1 -gpg==1.10.0 greenlet==0.4.15 gunicorn==20.0.4 +hashids==1.2.0 hiredis==1.0.1 html5lib==1.0.1 httmock==1.3.0 diff --git a/static/directives/repo-view/image-tag-tooltip.html b/static/directives/repo-view/image-tag-tooltip.html deleted file mode 100644 index 3b1051d06..000000000 --- a/static/directives/repo-view/image-tag-tooltip.html +++ /dev/null @@ -1,11 +0,0 @@ -
-
- Image {{ tag.image_id.substr(0, 12) }} -
-
    -
  • {{ tag.name }}
  • -
-
and {{ imageMap[tag.image_id].length - 5 }} more tags
-
\ No newline at end of file diff --git a/static/directives/repo-view/manifest-tag-tooltip.html b/static/directives/repo-view/manifest-tag-tooltip.html new file mode 100644 index 000000000..73c204bc1 --- /dev/null +++ b/static/directives/repo-view/manifest-tag-tooltip.html @@ -0,0 +1,11 @@ +
+
+ Manifest {{ tag.manifest_digest.substr(7, 12) }} +
+
    +
  • {{ tag.name }}
  • +
+
and {{ manifestMap[tag.manifest_digest].length - 5 }} more tags
+
\ No newline at end of file diff --git a/static/directives/repo-view/repo-panel-tags.html b/static/directives/repo-view/repo-panel-tags.html index 935f46688..a2a0c9d0c 100644 --- a/static/directives/repo-view/repo-panel-tags.html +++ b/static/directives/repo-view/repo-panel-tags.html @@ -32,9 +32,9 @@ Commit SHAs -
- {{ ::it.image_id.substr(0, 12) }} +
+
@@ -116,16 +116,16 @@ style="width: 140px;"> Expires - - Manifest + Manifest - + @@ -167,14 +167,6 @@ See Child Manifests - - - - Unsupported - - @@ -198,11 +190,11 @@ + ng-if="manifestTracks.length > maxTrackCount"> - + + ng-if="::getTrackEntryForIndex(mt, $parent.$parent.$index)" + ng-class="::trackLineClass(mt, $parent.$parent.$parent.$index)" + ng-style="::{'borderColor': getTrackEntryForIndex(mt, $parent.$parent.$parent.$index).color}"> - + + ng-if="::getTrackEntryForIndex(mt, $parent.$parent.$parent.$parent.$index)" + ng-class="::trackLineExpandedClass(mt, $parent.$parent.$parent.$parent.$parent.$index)" + ng-style="::{'borderColor': getTrackEntryForIndex(mt, $parent.$parent.$parent.$parent.$parent.$index).color}"> @@ -320,12 +312,12 @@
- + + ng-if="::getTrackEntryForIndex(mt, $parent.$parent.$index)" + ng-class="::trackLineExpandedClass(mt, $parent.$parent.$parent.$index)" + ng-style="::{'borderColor': getTrackEntryForIndex(mt, $parent.$parent.$parent.$index).color}"> diff --git a/static/js/directives/repo-view/repo-panel-tags.js b/static/js/directives/repo-view/repo-panel-tags.js index 95dc671ed..580d3795c 100644 --- a/static/js/directives/repo-view/repo-panel-tags.js +++ b/static/js/directives/repo-view/repo-panel-tags.js @@ -89,78 +89,78 @@ angular.module('quay').directive('repoPanelTags', function () { } // Sort the tags by the predicate and the reverse, and map the information. - var imageIDs = []; var ordered = TableService.buildOrderedItems(allTags, $scope.options, - ['name'], ['last_modified_datetime', 'size']).entries; + ['name', 'manifest_digest'], ['last_modified_datetime', 'size']).entries; var checked = []; - var imageMap = {}; - var imageIndexMap = {}; + var manifestMap = {}; + var manifestIndexMap = {}; + var manifestDigests = []; for (var i = 0; i < ordered.length; ++i) { var tagInfo = ordered[i]; - if (!tagInfo.image_id) { + if (!tagInfo.manifest_digest) { continue; } - if (!imageMap[tagInfo.image_id]) { - imageMap[tagInfo.image_id] = []; - imageIDs.push(tagInfo.image_id) + if (!manifestMap[tagInfo.manifest_digest]) { + manifestMap[tagInfo.manifest_digest] = []; + manifestDigests.push(tagInfo.manifest_digest) } - imageMap[tagInfo.image_id].push(tagInfo); + manifestMap[tagInfo.manifest_digest].push(tagInfo); if ($.inArray(tagInfo.name, $scope.selectedTags) >= 0) { checked.push(tagInfo); } - if (!imageIndexMap[tagInfo.image_id]) { - imageIndexMap[tagInfo.image_id] = {'start': i, 'end': i}; + if (!manifestIndexMap[tagInfo.manifest_digest]) { + manifestIndexMap[tagInfo.manifest_digest] = {'start': i, 'end': i}; } - imageIndexMap[tagInfo.image_id]['end'] = i; + manifestIndexMap[tagInfo.manifest_digest]['end'] = i; }; // Calculate the image tracks. var colors = d3.scale.category10(); - if (Object.keys(imageMap).length > 10) { + if (Object.keys(manifestMap).length > 10) { colors = d3.scale.category20(); } - var imageTracks = []; - var imageTrackEntries = []; - var trackEntryForImage = {}; + var manifestTracks = []; + var manifestTrackEntries = []; + var trackEntryForManifest = {}; var visibleStartIndex = ($scope.options.page * $scope.tagsPerPage); var visibleEndIndex = (($scope.options.page + 1) * $scope.tagsPerPage); - imageIDs.sort().map(function(image_id) { - if (imageMap[image_id].length >= 2){ + manifestDigests.sort().map(function(manifest_digest) { + if (manifestMap[manifest_digest].length >= 2){ // Create the track entry. - var imageIndexRange = imageIndexMap[image_id]; - var colorIndex = imageTrackEntries.length; + var manifestIndexRange = manifestIndexMap[manifest_digest]; + var colorIndex = manifestTrackEntries.length; var trackEntry = { - 'image_id': image_id, + 'manifest_digest': manifest_digest, 'color': colors(colorIndex), - 'count': imageMap[image_id].length, - 'tags': imageMap[image_id], - 'index_range': imageIndexRange, - 'visible': visibleStartIndex <= imageIndexRange.end && imageIndexRange.start <= visibleEndIndex, + 'count': manifestMap[manifest_digest].length, + 'tags': manifestMap[manifest_digest], + 'index_range': manifestIndexRange, + 'visible': visibleStartIndex <= manifestIndexRange.end && manifestIndexRange.start <= visibleEndIndex, }; - trackEntryForImage[image_id] = trackEntry; - imageMap[image_id]['color'] = colors(colorIndex); + trackEntryForManifest[manifest_digest] = trackEntry; + manifestMap[manifest_digest]['color'] = colors(colorIndex); // Find the track in which we can place the entry, if any. var existingTrack = null; - for (var i = 0; i < imageTracks.length; ++i) { + for (var i = 0; i < manifestTracks.length; ++i) { // For the current track, ensure that the start and end index // for the current entry is outside of the range of the track's // entries. If so, then we can add the entry to the track. - var currentTrack = imageTracks[i]; + var currentTrack = manifestTracks[i]; var canAddToCurrentTrack = true; for (var j = 0; j < currentTrack.entries.length; ++j) { var currentTrackEntry = currentTrack.entries[j]; - var entryInfo = imageIndexMap[currentTrackEntry.image_id]; - if (Math.max(entryInfo.start, imageIndexRange.start) <= Math.min(entryInfo.end, imageIndexRange.end)) { + var entryInfo = manifestIndexMap[currentTrackEntry.image_id]; + if (Math.max(entryInfo.start, manifestIndexRange.start) <= Math.min(entryInfo.end, manifestIndexRange.end)) { canAddToCurrentTrack = false; break; } @@ -175,38 +175,38 @@ angular.module('quay').directive('repoPanelTags', function () { // Add the entry to the track or create a new track if necessary. if (existingTrack) { existingTrack.entries.push(trackEntry) - existingTrack.entryByImageId[image_id] = trackEntry; - existingTrack.endIndex = Math.max(existingTrack.endIndex, imageIndexRange.end); + existingTrack.entryByManifestDigest[manifest_digest] = trackEntry; + existingTrack.endIndex = Math.max(existingTrack.endIndex, manifestIndexRange.end); - for (var j = imageIndexRange.start; j <= imageIndexRange.end; j++) { + for (var j = manifestIndexRange.start; j <= manifestIndexRange.end; j++) { existingTrack.entryByIndex[j] = trackEntry; } } else { - var entryByImageId = {}; - entryByImageId[image_id] = trackEntry; + var entryByManifestDigest = {}; + entryByManifestDigest[manifest_digest] = trackEntry; var entryByIndex = {}; - for (var j = imageIndexRange.start; j <= imageIndexRange.end; j++) { + for (var j = manifestIndexRange.start; j <= manifestIndexRange.end; j++) { entryByIndex[j] = trackEntry; } - imageTracks.push({ + manifestTracks.push({ 'entries': [trackEntry], - 'entryByImageId': entryByImageId, - 'startIndex': imageIndexRange.start, - 'endIndex': imageIndexRange.end, + 'entryByManifestDigest': entryByManifestDigest, + 'startIndex': manifestIndexRange.start, + 'endIndex': manifestIndexRange.end, 'entryByIndex': entryByIndex, }); } - imageTrackEntries.push(trackEntry); + manifestTrackEntries.push(trackEntry); } }); - $scope.imageMap = imageMap; - $scope.imageTracks = imageTracks; - $scope.imageTrackEntries = imageTrackEntries; - $scope.trackEntryForImage = trackEntryForImage; + $scope.manifestMap = manifestMap; + $scope.manifestTracks = manifestTracks; + $scope.manifestTrackEntries = manifestTrackEntries; + $scope.trackEntryForManifest = trackEntryForManifest; $scope.options.page = 0; @@ -241,7 +241,7 @@ angular.module('quay').directive('repoPanelTags', function () { }); $scope.$watch('selectedTags', function(selectedTags) { - if (!selectedTags || !$scope.repository || !$scope.imageMap) { return; } + if (!selectedTags || !$scope.repository || !$scope.manifestMap) { return; } $scope.checkedTags.setChecked(selectedTags.map(function(tag) { return $scope.repositoryTags[tag]; @@ -410,8 +410,8 @@ angular.module('quay').directive('repoPanelTags', function () { return false; }; - $scope.imageIDFilter = function(image_id, tag) { - return tag.image_id == image_id; + $scope.manifestDigestFilter = function(manifest_digest, tag) { + return tag.manifest_digest == manifest_digest; }; $scope.setTab = function(tab) { @@ -420,7 +420,7 @@ angular.module('quay').directive('repoPanelTags', function () { $scope.selectTrack = function(it) { $scope.checkedTags.checkByFilter(function(tag) { - return $scope.imageIDFilter(it.image_id, tag); + return $scope.manifestDigestFilter(it.manifest_digest, tag); }); }; diff --git a/test/fixtures.py b/test/fixtures.py index ff235f6d0..762489de8 100644 --- a/test/fixtures.py +++ b/test/fixtures.py @@ -26,7 +26,6 @@ from endpoints.appr import appr_bp from endpoints.web import web from endpoints.v1 import v1_bp from endpoints.v2 import v2_bp -from endpoints.verbs import verbs as verbs_bp from endpoints.webhooks import webhooks from initdb import initialize_database, populate_database @@ -312,7 +311,6 @@ def app(appconfig, initialized_db): app.register_blueprint(api_bp, url_prefix="/api") app.register_blueprint(appr_bp, url_prefix="/cnr") app.register_blueprint(web, url_prefix="/") - app.register_blueprint(verbs_bp, url_prefix="/c1") app.register_blueprint(v1_bp, url_prefix="/v1") app.register_blueprint(v2_bp, url_prefix="/v2") app.register_blueprint(webhooks, url_prefix="/webhooks") diff --git a/test/registry/fixtures.py b/test/registry/fixtures.py index 2132c251b..106b53230 100644 --- a/test/registry/fixtures.py +++ b/test/registry/fixtures.py @@ -16,9 +16,8 @@ from app import storage from data.database import ( close_db_filter, configure, - DerivedStorageForImage, QueueItem, - Image, + ImageStorage, TagManifest, TagManifestToManifest, Manifest, @@ -30,6 +29,7 @@ from data.database import ( from data import model from data.registry_model import registry_model from endpoints.csrf import generate_csrf_token +from image.docker.schema2 import EMPTY_LAYER_BLOB_DIGEST from util.log import logfile_path from test.registry.liveserverfixture import LiveServerExecutor @@ -46,15 +46,22 @@ def registry_server_executor(app): ) return "OK" - def delete_image(image_id): - image = Image.get(docker_image_id=image_id) - image.docker_image_id = "DELETED" - image.save() - return "OK" + def verify_replication_for(namespace, repo_name, tag_name): + repo_ref = registry_model.lookup_repository(namespace, repo_name) + assert repo_ref + + tag = registry_model.get_repo_tag(repo_ref, tag_name) + assert tag + + manifest = registry_model.get_manifest_for_tag(tag) + assert manifest + + for layer in registry_model.list_manifest_layers(manifest, storage): + if layer.blob.digest != EMPTY_LAYER_BLOB_DIGEST: + QueueItem.select().where( + QueueItem.queue_name ** ("%" + layer.blob.uuid + "%") + ).get() - def get_storage_replication_entry(image_id): - image = Image.get(docker_image_id=image_id) - QueueItem.select().where(QueueItem.queue_name ** ("%" + image.storage.uuid + "%")).get() return "OK" def set_feature(feature_name, value): @@ -81,10 +88,6 @@ def registry_server_executor(app): return jsonify({"old_value": old_value}) - def clear_derived_cache(): - DerivedStorageForImage.delete().execute() - return "OK" - def clear_uncompressed_size(image_id): image = model.image.get_image_by_id("devtable", "newrepo", image_id) image.storage.uncompressed_size = None @@ -158,11 +161,9 @@ def registry_server_executor(app): executor = LiveServerExecutor() executor.register("generate_csrf", generate_csrf) executor.register("set_supports_direct_download", set_supports_direct_download) - executor.register("delete_image", delete_image) - executor.register("get_storage_replication_entry", get_storage_replication_entry) + executor.register("verify_replication_for", verify_replication_for) executor.register("set_feature", set_feature) executor.register("set_config_key", set_config_key) - executor.register("clear_derived_cache", clear_derived_cache) executor.register("clear_uncompressed_size", clear_uncompressed_size) executor.register("add_token", add_token) executor.register("break_database", break_database) diff --git a/test/registry/protocol_v1.py b/test/registry/protocol_v1.py index 5f8d1ca6b..07a4e7cb8 100644 --- a/test/registry/protocol_v1.py +++ b/test/registry/protocol_v1.py @@ -153,6 +153,9 @@ class V1Protocol(RegistryProtocol): assert expected_failure == Failures.UNKNOWN_TAG return None + if expected_failure == Failures.UNKNOWN_TAG: + return None + tag_image_id = image_ids[tag_name] assert image_id_data.json() == tag_image_id @@ -331,7 +334,7 @@ class V1Protocol(RegistryProtocol): namespace, repo_name, tag_name, - image, + image_id, credentials=None, expected_failure=None, options=None, @@ -341,7 +344,7 @@ class V1Protocol(RegistryProtocol): session, "PUT", "/v1/repositories/%s/tags/%s" % (self.repo_name(namespace, repo_name), tag_name), - data='"%s"' % image.id, + data='"%s"' % image_id, auth=auth, expected_status=(200, expected_failure, V1ProtocolSteps.PUT_TAG), ) diff --git a/test/registry/registry_tests.py b/test/registry/registry_tests.py index 681c807d7..5052552c2 100644 --- a/test/registry/registry_tests.py +++ b/test/registry/registry_tests.py @@ -835,10 +835,11 @@ def test_image_replication( credentials=credentials, ) - # Ensure that entries were created for each image. - for image_id in list(result.image_ids.values()): - r = registry_server_executor.on(liveserver).get_storage_replication_entry(image_id) - assert r.text == "OK" + # Ensure that entries were created for each layer. + r = registry_server_executor.on(liveserver).verify_replication_for( + "devtable", "newrepo", "latest" + ) + assert r.text == "OK" def test_image_replication_empty_layers( @@ -872,10 +873,11 @@ def test_image_replication_empty_layers( credentials=credentials, ) - # Ensure that entries were created for each image. - for image_id in list(result.image_ids.values()): - r = registry_server_executor.on(liveserver).get_storage_replication_entry(image_id) - assert r.text == "OK" + # Ensure that entries were created for each layer. + r = registry_server_executor.on(liveserver).verify_replication_for( + "devtable", "newrepo", "latest" + ) + assert r.text == "OK" @pytest.mark.parametrize( @@ -1615,333 +1617,6 @@ def test_tags_disabled_namespace( ) -def test_squashed_image_disabled_namespace( - pusher, sized_images, liveserver_session, liveserver, registry_server_executor, app_reloader -): - """ Test: Attempting to pull a squashed image from a disabled namespace. """ - credentials = ("devtable", "password") - - # Push an image to download. - pusher.push( - liveserver_session, "buynlarge", "newrepo", "latest", sized_images, credentials=credentials - ) - - # Disable the buynlarge namespace. - registry_server_executor.on(liveserver).disable_namespace("buynlarge") - - # Attempt to pull the squashed version. - response = liveserver_session.get("/c1/squash/buynlarge/newrepo/latest", auth=credentials) - assert response.status_code == 400 - - -def test_squashed_image_disabled_user( - pusher, sized_images, liveserver_session, liveserver, registry_server_executor, app_reloader -): - """ Test: Attempting to pull a squashed image via a disabled user. """ - credentials = ("devtable", "password") - - # Push an image to download. - pusher.push( - liveserver_session, "buynlarge", "newrepo", "latest", sized_images, credentials=credentials - ) - - # Disable the devtable namespace. - registry_server_executor.on(liveserver).disable_namespace("devtable") - - # Attempt to pull the squashed version. - response = liveserver_session.get("/c1/squash/buynlarge/newrepo/latest", auth=credentials) - assert response.status_code == 403 - - -@pytest.mark.parametrize("use_estimates", [False, True,]) -def test_multilayer_squashed_images( - use_estimates, - pusher, - multi_layer_images, - liveserver_session, - liveserver, - registry_server_executor, - app_reloader, -): - """ Test: Pulling of multilayer, complex squashed images. """ - credentials = ("devtable", "password") - - # Push an image to download. - pusher.push( - liveserver_session, - "devtable", - "newrepo", - "latest", - multi_layer_images, - credentials=credentials, - ) - - if use_estimates: - # Clear the uncompressed size stored for the images, to ensure that we estimate instead. - for image in multi_layer_images: - registry_server_executor.on(liveserver).clear_uncompressed_size(image.id) - - # Pull the squashed version. - response = liveserver_session.get("/c1/squash/devtable/newrepo/latest", auth=credentials) - assert response.status_code == 200 - - tar = tarfile.open(fileobj=BytesIO(response.content)) - - # Verify the squashed image. - expected_image_id = next( - (name for name in tar.getnames() if not "/" in name and name != "repositories") - ) - expected_names = [ - "repositories", - expected_image_id, - "%s/json" % expected_image_id, - "%s/VERSION" % expected_image_id, - "%s/layer.tar" % expected_image_id, - ] - - assert tar.getnames() == expected_names - - # Verify the JSON image data. - json_data = tar.extractfile(tar.getmember("%s/json" % expected_image_id)).read() - - # Ensure the JSON loads and parses. - result = json.loads(json_data) - assert result["id"] == expected_image_id - assert result["config"]["internal_id"] == "layer5" - - # Ensure that squashed layer tar can be opened. - tar = tarfile.open(fileobj=tar.extractfile(tar.getmember("%s/layer.tar" % expected_image_id))) - assert set(tar.getnames()) == {"contents", "file1", "file2", "file3", "file4"} - - # Check the contents of various files. - assert tar.extractfile("contents").read() == b"layer 5 contents" - assert tar.extractfile("file1").read() == b"from-layer-3" - assert tar.extractfile("file2").read() == b"from-layer-2" - assert tar.extractfile("file3").read() == b"from-layer-4" - assert tar.extractfile("file4").read() == b"from-layer-5" - - -@pytest.mark.parametrize("use_estimates", [False, True,]) -@pytest.mark.parametrize("is_readonly", [False, True,]) -def test_squashed_images( - use_estimates, - pusher, - sized_images, - liveserver_session, - is_readonly, - liveserver, - registry_server_executor, - app_reloader, -): - """ Test: Pulling of squashed images. """ - credentials = ("devtable", "password") - - # Push an image to download. - pusher.push( - liveserver_session, "devtable", "newrepo", "latest", sized_images, credentials=credentials - ) - - if use_estimates: - # Clear the uncompressed size stored for the images, to ensure that we estimate instead. - for image in sized_images: - registry_server_executor.on(liveserver).clear_uncompressed_size(image.id) - - # Pull the squashed version. - with ConfigChange( - "REGISTRY_STATE", - "readonly" if is_readonly else "normal", - registry_server_executor.on(liveserver), - liveserver, - ): - response = liveserver_session.get("/c1/squash/devtable/newrepo/latest", auth=credentials) - assert response.status_code == 200 - - tar = tarfile.open(fileobj=BytesIO(response.content)) - - # Verify the squashed image. - expected_image_id = next( - (name for name in tar.getnames() if not "/" in name and name != "repositories") - ) - expected_names = [ - "repositories", - expected_image_id, - "%s/json" % expected_image_id, - "%s/VERSION" % expected_image_id, - "%s/layer.tar" % expected_image_id, - ] - - assert tar.getnames() == expected_names - - # Verify the JSON image data. - json_data = tar.extractfile(tar.getmember("%s/json" % expected_image_id)).read() - - # Ensure the JSON loads and parses. - result = json.loads(json_data) - assert result["id"] == expected_image_id - assert result["config"]["foo"] == "childbar" - - # Ensure that squashed layer tar can be opened. - tar = tarfile.open( - fileobj=tar.extractfile(tar.getmember("%s/layer.tar" % expected_image_id)) - ) - assert tar.getnames() == ["contents"] - - # Check the contents. - assert tar.extractfile("contents").read() == b"some contents" - - -EXPECTED_ACI_MANIFEST = { - "acKind": "ImageManifest", - "app": { - "environment": [], - "mountPoints": [], - "group": "root", - "user": "root", - "workingDirectory": "/", - "exec": ["/bin/sh", "-c", '""hello""'], - "isolators": [], - "eventHandlers": [], - "ports": [], - "annotations": [ - {"name": "created", "value": "2018-04-03T18:37:09.284840891Z"}, - {"name": "homepage", "value": "http://localhost:5000/devtable/newrepo:latest"}, - {"name": "quay.io/derived-image", "value": "DERIVED_IMAGE_ID"}, - ], - }, - "labels": [ - {"name": "version", "value": "latest"}, - {"name": "arch", "value": "amd64"}, - {"name": "os", "value": "linux"}, - ], - "acVersion": "0.6.1", - "name": "localhost/devtable/newrepo", -} - - -@pytest.mark.parametrize("is_readonly", [False, True,]) -def test_aci_conversion( - pusher, - sized_images, - liveserver_session, - is_readonly, - liveserver, - registry_server_executor, - app_reloader, -): - """ Test: Pulling of ACI converted images. """ - credentials = ("devtable", "password") - - # Push an image to download. - pusher.push( - liveserver_session, "devtable", "newrepo", "latest", sized_images, credentials=credentials - ) - - # Pull the ACI version. - with ConfigChange( - "REGISTRY_STATE", - "readonly" if is_readonly else "normal", - registry_server_executor.on(liveserver), - liveserver, - ): - response = liveserver_session.get( - "/c1/aci/server_name/devtable/newrepo/latest/aci/linux/amd64", auth=credentials - ) - assert response.status_code == 200 - tar = tarfile.open(fileobj=BytesIO(response.content)) - assert set(tar.getnames()) == {"manifest", "rootfs", "rootfs/contents"} - - assert tar.extractfile("rootfs/contents").read() == b"some contents" - loaded = json.loads(tar.extractfile("manifest").read()) - for annotation in loaded["app"]["annotations"]: - if annotation["name"] == "quay.io/derived-image": - annotation["value"] = "DERIVED_IMAGE_ID" - - assert loaded == EXPECTED_ACI_MANIFEST - - if not is_readonly: - # Wait for the ACI signature to be written. - time.sleep(1) - - # Pull the ACI signature. - response = liveserver_session.get( - "/c1/aci/server_name/devtable/newrepo/latest/aci.asc/linux/amd64", auth=credentials - ) - assert response.status_code == 200 - - -@pytest.mark.parametrize("schema_version", [1, 2,]) -def test_aci_conversion_manifest_list( - v22_protocol, - sized_images, - different_images, - liveserver_session, - data_model, - liveserver, - registry_server_executor, - app_reloader, - schema_version, -): - """ Test: Pulling of ACI converted image from a manifest list. """ - credentials = ("devtable", "password") - options = ProtocolOptions() - - # Build the manifests that will go in the list. - blobs = {} - - signed = v22_protocol.build_schema1( - "devtable", "newrepo", "latest", sized_images, blobs, options, arch="amd64" - ) - first_manifest = signed.unsigned() - if schema_version == 2: - first_manifest = v22_protocol.build_schema2(sized_images, blobs, options) - - second_manifest = v22_protocol.build_schema2(different_images, blobs, options) - - # Create and push the manifest list. - builder = DockerSchema2ManifestListBuilder() - builder.add_manifest(first_manifest, "amd64", "linux") - builder.add_manifest(second_manifest, "arm", "linux") - manifestlist = builder.build() - - v22_protocol.push_list( - liveserver_session, - "devtable", - "newrepo", - "latest", - manifestlist, - [first_manifest, second_manifest], - blobs, - credentials=credentials, - options=options, - ) - - # Pull the ACI version. - response = liveserver_session.get( - "/c1/aci/server_name/devtable/newrepo/latest/aci/linux/amd64", auth=credentials - ) - assert response.status_code == 200 - tar = tarfile.open(fileobj=BytesIO(response.content)) - assert set(tar.getnames()) == {"manifest", "rootfs", "rootfs/contents"} - - assert tar.extractfile("rootfs/contents").read() == b"some contents" - - loaded = json.loads(tar.extractfile("manifest").read()) - for annotation in loaded["app"]["annotations"]: - if annotation["name"] == "quay.io/derived-image": - annotation["value"] = "DERIVED_IMAGE_ID" - - assert loaded == EXPECTED_ACI_MANIFEST - - # Wait for the ACI signature to be written. - time.sleep(1) - - # Pull the ACI signature. - response = liveserver_session.get( - "/c1/aci/server_name/devtable/newrepo/latest/aci.asc/linux/amd64", auth=credentials - ) - assert response.status_code == 200 - - @pytest.mark.parametrize( "push_user, push_namespace, push_repo, mount_repo_name, expected_failure", [ @@ -2323,10 +1998,8 @@ def test_push_pull_same_blobs(pusher, puller, liveserver_session, app_reloader): ) -def test_push_tag_existing_image( - v1_protocol, puller, basic_images, liveserver_session, app_reloader -): - """ Test: Push a new tag on an existing manifest/image. """ +def test_push_tag_existing_image(v1_protocol, basic_images, liveserver_session, app_reloader): + """ Test: Push a new tag on an existing image. """ credentials = ("devtable", "password") # Push a new repository. @@ -2334,18 +2007,24 @@ def test_push_tag_existing_image( liveserver_session, "devtable", "newrepo", "latest", basic_images, credentials=credentials ) - # Push the same image/manifest to another tag in the repository. + # Pull the repository to verify. + pulled = v1_protocol.pull( + liveserver_session, "devtable", "newrepo", "latest", basic_images, credentials=credentials, + ) + assert pulled.image_ids + + # Push the same image to another tag in the repository. v1_protocol.tag( liveserver_session, "devtable", "newrepo", "anothertag", - basic_images[-1], + pulled.image_ids["latest"], credentials=credentials, ) # Pull the repository to verify. - puller.pull( + v1_protocol.pull( liveserver_session, "devtable", "newrepo", @@ -2655,131 +2334,6 @@ def test_push_pull_manifest_list_duplicate_manifest( ) -def test_squashed_images_empty_layer( - pusher, - images_with_empty_layer, - liveserver_session, - liveserver, - registry_server_executor, - app_reloader, -): - """ Test: Pulling of squashed images for a manifest with empty layers. """ - credentials = ("devtable", "password") - - # Push an image to download. - pusher.push( - liveserver_session, - "devtable", - "newrepo", - "latest", - images_with_empty_layer, - credentials=credentials, - ) - - # Pull the squashed version. - response = liveserver_session.get("/c1/squash/devtable/newrepo/latest", auth=credentials) - assert response.status_code == 200 - - tar = tarfile.open(fileobj=BytesIO(response.content)) - - # Verify the squashed image. - expected_image_id = next( - (name for name in tar.getnames() if not "/" in name and name != "repositories") - ) - expected_names = [ - "repositories", - expected_image_id, - "%s/json" % expected_image_id, - "%s/VERSION" % expected_image_id, - "%s/layer.tar" % expected_image_id, - ] - - assert tar.getnames() == expected_names - - -def test_squashed_image_unsupported( - v22_protocol, basic_images, liveserver_session, liveserver, app_reloader, data_model -): - """ Test: Attempting to pull a squashed image for a manifest list without an amd64+linux entry. - """ - credentials = ("devtable", "password") - options = ProtocolOptions() - - # Build the manifest that will go in the list. - blobs = {} - manifest = v22_protocol.build_schema2(basic_images, blobs, options) - - # Create and push the manifest list. - builder = DockerSchema2ManifestListBuilder() - builder.add_manifest(manifest, "foobar", "someos") - manifestlist = builder.build() - - v22_protocol.push_list( - liveserver_session, - "devtable", - "newrepo", - "latest", - manifestlist, - [manifest], - blobs, - credentials=credentials, - options=options, - ) - - # Attempt to pull the squashed version. - response = liveserver_session.get("/c1/squash/devtable/newrepo/latest", auth=credentials) - assert response.status_code == 404 - - -def test_squashed_image_manifest_list( - v22_protocol, basic_images, liveserver_session, liveserver, app_reloader, data_model -): - """ Test: Pull a squashed image for a manifest list with an amd64+linux entry. - """ - credentials = ("devtable", "password") - options = ProtocolOptions() - - # Build the manifest that will go in the list. - blobs = {} - manifest = v22_protocol.build_schema2(basic_images, blobs, options) - - # Create and push the manifest list. - builder = DockerSchema2ManifestListBuilder() - builder.add_manifest(manifest, "amd64", "linux") - manifestlist = builder.build() - - v22_protocol.push_list( - liveserver_session, - "devtable", - "newrepo", - "latest", - manifestlist, - [manifest], - blobs, - credentials=credentials, - options=options, - ) - - # Pull the squashed version. - response = liveserver_session.get("/c1/squash/devtable/newrepo/latest", auth=credentials) - assert response.status_code == 200 - - # Verify the squashed image. - tar = tarfile.open(fileobj=BytesIO(response.content)) - expected_image_id = next( - (name for name in tar.getnames() if not "/" in name and name != "repositories") - ) - expected_names = [ - "repositories", - expected_image_id, - "%s/json" % expected_image_id, - "%s/VERSION" % expected_image_id, - "%s/layer.tar" % expected_image_id, - ] - - assert tar.getnames() == expected_names - - def test_verify_schema2( v22_protocol, basic_images, liveserver_session, liveserver, app_reloader, data_model ): diff --git a/test/test_api_usage.py b/test/test_api_usage.py index c18b13c5a..8c102f6b7 100644 --- a/test/test_api_usage.py +++ b/test/test_api_usage.py @@ -2444,7 +2444,6 @@ class TestDeleteRepository(ApiTestCase): # Make sure the repository has some images and tags. repo_ref = registry_model.lookup_repository(ADMIN_ACCESS_USER, "complex") - self.assertTrue(len(list(registry_model.get_legacy_images(repo_ref))) > 0) self.assertTrue(len(list(registry_model.list_all_active_repository_tags(repo_ref))) > 0) # Add some data for the repository, in addition to is already existing images and tags. @@ -2525,11 +2524,11 @@ class TestGetRepository(ApiTestCase): self.login(ADMIN_ACCESS_USER) # base + repo + is_starred + tags - with assert_query_count(BASE_LOGGEDIN_QUERY_COUNT + 4 + 1): + with assert_query_count(BASE_LOGGEDIN_QUERY_COUNT + 4): self.getJsonResponse(Repository, params=dict(repository=ADMIN_ACCESS_USER + "/simple")) # base + repo + is_starred + tags - with assert_query_count(BASE_LOGGEDIN_QUERY_COUNT + 4 + 1): + with assert_query_count(BASE_LOGGEDIN_QUERY_COUNT + 4): json = self.getJsonResponse( Repository, params=dict(repository=ADMIN_ACCESS_USER + "/gargantuan") ) @@ -3326,8 +3325,7 @@ class TestListAndDeleteTag(ApiTestCase): params=dict(repository=ADMIN_ACCESS_USER + "/complex", tag="sometag"), ) - sometag_images = json["images"] - self.assertEqual(sometag_images, staging_images) + assert json["images"] # Move the tag. self.putResponse( @@ -3344,8 +3342,7 @@ class TestListAndDeleteTag(ApiTestCase): ) sometag_new_images = json["images"] - self.assertEqual(1, len(sometag_new_images)) - self.assertEqual(staging_images[-1], sometag_new_images[0]) + assert sometag_new_images def test_deletesubtag(self): self.login(ADMIN_ACCESS_USER) @@ -3384,7 +3381,7 @@ class TestListAndDeleteTag(ApiTestCase): self.login(ADMIN_ACCESS_USER) repo_ref = registry_model.lookup_repository(ADMIN_ACCESS_USER, "simple") - latest_tag = registry_model.get_repo_tag(repo_ref, "latest", include_legacy_image=True) + latest_tag = registry_model.get_repo_tag(repo_ref, "latest") # Create 8 tags in the simple repo. remaining_tags = {"latest", "prod"} @@ -3392,7 +3389,7 @@ class TestListAndDeleteTag(ApiTestCase): tag_name = "tag" + str(i) remaining_tags.add(tag_name) assert registry_model.retarget_tag( - repo_ref, tag_name, latest_tag.legacy_image, storage, docker_v2_signing_key + repo_ref, tag_name, latest_tag.manifest, storage, docker_v2_signing_key ) # Make sure we can iterate over all of them. diff --git a/test/test_secscan.py b/test/test_secscan.py index 9bf182c1c..59d7f497d 100644 --- a/test/test_secscan.py +++ b/test/test_secscan.py @@ -2,44 +2,26 @@ import json import time import unittest -from app import app, storage, notification_queue, url_scheme_and_hostname +from app import app, storage, url_scheme_and_hostname from data import model from data.registry_model import registry_model -from data.database import Image, IMAGE_NOT_SCANNED_ENGINE_VERSION -from endpoints.v2 import v2_bp +from data.database import Image, ManifestLegacyImage from initdb import setup_database_for_testing, finished_database_for_testing -from notifications.notificationevent import VulnerabilityFoundEvent from util.secscan.secscan_util import get_blob_download_uri_getter -from util.morecollections import AttrDict from util.secscan.api import SecurityScannerAPI, APIRequestFailure -from util.secscan.analyzer import LayerAnalyzer from util.secscan.fake import fake_security_scanner -from util.secscan.notifier import SecurityNotificationHandler, ProcessNotificationPageResult from util.security.instancekeys import InstanceKeys -from workers.security_notification_worker import SecurityNotificationWorker ADMIN_ACCESS_USER = "devtable" SIMPLE_REPO = "simple" -COMPLEX_REPO = "complex" - - -def process_notification_data(legacy_api, notification_data): - handler = SecurityNotificationHandler(legacy_api, 100) - result = handler.process_notification_page_data(notification_data) - handler.send_notifications() - return result == ProcessNotificationPageResult.FINISHED_PROCESSING def _get_legacy_image(namespace, repo, tag, include_storage=True): repo_ref = registry_model.lookup_repository(namespace, repo) - repo_tag = registry_model.get_repo_tag(repo_ref, tag, include_legacy_image=True) - return Image.get(id=repo_tag.legacy_image._db_id) - - -def _delete_tag(namespace, repo, tag): - repo_ref = registry_model.lookup_repository(namespace, repo) - registry_model.delete_tag(repo_ref, tag) + repo_tag = registry_model.get_repo_tag(repo_ref, tag) + manifest = registry_model.get_manifest_for_tag(repo_tag) + return ManifestLegacyImage.get(manifest_id=manifest._db_id).image class TestSecurityScanner(unittest.TestCase): @@ -93,785 +75,24 @@ class TestSecurityScanner(unittest.TestCase): """ Test for basic retrieval of layers from the security scanner. """ - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest", include_storage=True) + + repo_ref = registry_model.lookup_repository(ADMIN_ACCESS_USER, SIMPLE_REPO) + repo_tag = registry_model.get_repo_tag(repo_ref, "latest") + manifest = registry_model.get_manifest_for_tag(repo_tag) + registry_model.populate_legacy_images_for_testing(manifest, storage) with fake_security_scanner() as security_scanner: # Ensure the layer doesn't exist yet. - self.assertFalse(security_scanner.has_layer(security_scanner.layer_id(layer))) - self.assertIsNone(self.api.get_layer_data(layer)) + self.assertFalse(security_scanner.has_layer(security_scanner.layer_id(manifest))) + self.assertIsNone(self.api.get_layer_data(manifest)) # Add the layer. - security_scanner.add_layer(security_scanner.layer_id(layer)) + security_scanner.add_layer(security_scanner.layer_id(manifest)) # Retrieve the results. - result = self.api.get_layer_data(layer, include_vulnerabilities=True) + result = self.api.get_layer_data(manifest, include_vulnerabilities=True) self.assertIsNotNone(result) - self.assertEqual(result["Layer"]["Name"], security_scanner.layer_id(layer)) - - def test_analyze_layer_nodirectdownload_success(self): - """ - Tests analyzing a layer when direct download is disabled. - """ - - # Disable direct download in fake storage. - storage.put_content(["local_us"], "supports_direct_download", b"false") - - try: - app.register_blueprint(v2_bp, url_prefix="/v2") - except: - # Already registered. - pass - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest", include_storage=True) - self.assertFalse(layer.security_indexed) - self.assertEqual(-1, layer.security_indexed_engine) - - # Ensure that the download is a registry+JWT download. - uri, auth_header = self.api._get_image_url_and_auth(layer) - self.assertIsNotNone(uri) - self.assertIsNotNone(auth_header) - - # Ensure the download doesn't work without the header. - rv = self.app.head(uri) - self.assertEqual(rv.status_code, 401) - - # Ensure the download works with the header. Note we use a HEAD here, as GET causes DB - # access which messes with the test runner's rollback. - rv = self.app.head(uri, headers=[("authorization", auth_header)]) - self.assertEqual(rv.status_code, 200) - - # Ensure the code works when called via analyze. - with fake_security_scanner() as security_scanner: - analyzer = LayerAnalyzer(app.config, self.api) - analyzer.analyze_recursively(layer) - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest") - self.assertAnalyzed(layer, security_scanner, True, 1) - - def test_analyze_layer_success(self): - """ - Tests that analyzing a layer successfully marks it as analyzed. - """ - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest", include_storage=True) - self.assertFalse(layer.security_indexed) - self.assertEqual(-1, layer.security_indexed_engine) - - with fake_security_scanner() as security_scanner: - analyzer = LayerAnalyzer(app.config, self.api) - analyzer.analyze_recursively(layer) - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest") - self.assertAnalyzed(layer, security_scanner, True, 1) - - def test_analyze_layer_failure(self): - """ - Tests that failing to analyze a layer (because it 422s) marks it as analyzed but failed. - """ - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest", include_storage=True) - self.assertFalse(layer.security_indexed) - self.assertEqual(-1, layer.security_indexed_engine) - - with fake_security_scanner() as security_scanner: - security_scanner.set_fail_layer_id(security_scanner.layer_id(layer)) - - analyzer = LayerAnalyzer(app.config, self.api) - analyzer.analyze_recursively(layer) - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest") - self.assertAnalyzed(layer, security_scanner, False, 1) - - def test_analyze_layer_internal_error(self): - """ - Tests that failing to analyze a layer (because it 500s) marks it as not analyzed. - """ - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest", include_storage=True) - self.assertFalse(layer.security_indexed) - self.assertEqual(-1, layer.security_indexed_engine) - - with fake_security_scanner() as security_scanner: - security_scanner.set_internal_error_layer_id(security_scanner.layer_id(layer)) - - analyzer = LayerAnalyzer(app.config, self.api) - with self.assertRaises(APIRequestFailure): - analyzer.analyze_recursively(layer) - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest") - self.assertAnalyzed(layer, security_scanner, False, -1) - - def test_analyze_layer_error(self): - """ - Tests that failing to analyze a layer (because it 400s) marks it as analyzed but failed. - """ - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest", include_storage=True) - self.assertFalse(layer.security_indexed) - self.assertEqual(-1, layer.security_indexed_engine) - - with fake_security_scanner() as security_scanner: - # Make is so trying to analyze the parent will fail with an error. - security_scanner.set_error_layer_id(security_scanner.layer_id(layer.parent)) - - # Try to the layer and its parents, but with one request causing an error. - analyzer = LayerAnalyzer(app.config, self.api) - analyzer.analyze_recursively(layer) - - # Make sure it is marked as analyzed, but in a failed state. - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest") - self.assertAnalyzed(layer, security_scanner, False, 1) - - def test_analyze_layer_unexpected_status(self): - """ - Tests that a response from a scanner with an unexpected status code fails correctly. - """ - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest", include_storage=True) - self.assertFalse(layer.security_indexed) - self.assertEqual(-1, layer.security_indexed_engine) - - with fake_security_scanner() as security_scanner: - # Make is so trying to analyze the parent will fail with an error. - security_scanner.set_unexpected_status_layer_id(security_scanner.layer_id(layer.parent)) - - # Try to the layer and its parents, but with one request causing an error. - analyzer = LayerAnalyzer(app.config, self.api) - with self.assertRaises(APIRequestFailure): - analyzer.analyze_recursively(layer) - - # Make sure it isn't analyzed. - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest") - self.assertAnalyzed(layer, security_scanner, False, -1) - - def test_analyze_layer_missing_parent_handled(self): - """ - Tests that a missing parent causes an automatic reanalysis, which succeeds. - """ - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest", include_storage=True) - self.assertFalse(layer.security_indexed) - self.assertEqual(-1, layer.security_indexed_engine) - - with fake_security_scanner() as security_scanner: - # Analyze the layer and its parents. - analyzer = LayerAnalyzer(app.config, self.api) - analyzer.analyze_recursively(layer) - - # Make sure it was analyzed. - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest") - self.assertAnalyzed(layer, security_scanner, True, 1) - - # Mark the layer as not yet scanned. - layer.security_indexed_engine = IMAGE_NOT_SCANNED_ENGINE_VERSION - layer.security_indexed = False - layer.save() - - # Remove the layer's parent entirely from the security scanner. - security_scanner.remove_layer(security_scanner.layer_id(layer.parent)) - - # Analyze again, which should properly re-analyze the missing parent and this layer. - analyzer.analyze_recursively(layer) - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest") - self.assertAnalyzed(layer, security_scanner, True, 1) - - def test_analyze_layer_invalid_parent(self): - """ - Tests that trying to reanalyze a parent that is invalid causes the layer to be marked as - analyzed, but failed. - """ - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest", include_storage=True) - self.assertFalse(layer.security_indexed) - self.assertEqual(-1, layer.security_indexed_engine) - - with fake_security_scanner() as security_scanner: - # Analyze the layer and its parents. - analyzer = LayerAnalyzer(app.config, self.api) - analyzer.analyze_recursively(layer) - - # Make sure it was analyzed. - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest") - self.assertAnalyzed(layer, security_scanner, True, 1) - - # Mark the layer as not yet scanned. - layer.security_indexed_engine = IMAGE_NOT_SCANNED_ENGINE_VERSION - layer.security_indexed = False - layer.save() - - # Remove the layer's parent entirely from the security scanner. - security_scanner.remove_layer(security_scanner.layer_id(layer.parent)) - - # Make is so trying to analyze the parent will fail. - security_scanner.set_error_layer_id(security_scanner.layer_id(layer.parent)) - - # Try to analyze again, which should try to reindex the parent and fail. - analyzer.analyze_recursively(layer) - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest") - self.assertAnalyzed(layer, security_scanner, False, 1) - - def test_analyze_layer_unsupported_parent(self): - """ - Tests that attempting to analyze a layer whose parent is unanalyzable, results in the layer - being marked as analyzed, but failed. - """ - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest", include_storage=True) - self.assertFalse(layer.security_indexed) - self.assertEqual(-1, layer.security_indexed_engine) - - with fake_security_scanner() as security_scanner: - # Make is so trying to analyze the parent will fail. - security_scanner.set_fail_layer_id(security_scanner.layer_id(layer.parent)) - - # Attempt to the layer and its parents. This should mark the layer itself as unanalyzable. - analyzer = LayerAnalyzer(app.config, self.api) - analyzer.analyze_recursively(layer) - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest") - self.assertAnalyzed(layer, security_scanner, False, 1) - - def test_analyze_layer_missing_storage(self): - """ - Tests trying to analyze a layer with missing storage. - """ - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest", include_storage=True) - self.assertFalse(layer.security_indexed) - self.assertEqual(-1, layer.security_indexed_engine) - - # Delete the storage for the layer. - path = model.storage.get_layer_path(layer.storage) - locations = app.config["DISTRIBUTED_STORAGE_PREFERENCE"] - storage.remove(locations, path) - storage.remove(locations, "all_files_exist") - - with fake_security_scanner() as security_scanner: - analyzer = LayerAnalyzer(app.config, self.api) - analyzer.analyze_recursively(layer) - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest") - self.assertAnalyzed(layer, security_scanner, False, 1) - - def assert_analyze_layer_notify( - self, security_indexed_engine, security_indexed, expect_notification - ): - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest", include_storage=True) - self.assertFalse(layer.security_indexed) - self.assertEqual(-1, layer.security_indexed_engine) - - # Ensure there are no existing events. - self.assertIsNone(notification_queue.get()) - - # Add a repo event for the layer. - repo = model.repository.get_repository(ADMIN_ACCESS_USER, SIMPLE_REPO) - model.notification.create_repo_notification( - repo, "vulnerability_found", "quay_notification", {}, {"level": 100} - ) - - # Update the layer's state before analyzing. - layer.security_indexed_engine = security_indexed_engine - layer.security_indexed = security_indexed - layer.save() - - with fake_security_scanner() as security_scanner: - security_scanner.set_vulns( - security_scanner.layer_id(layer), - [ - { - "Name": "CVE-2014-9471", - "Namespace": "debian:8", - "Description": "Some service", - "Link": "https://security-tracker.debian.org/tracker/CVE-2014-9471", - "Severity": "Low", - "FixedBy": "9.23-5", - }, - { - "Name": "CVE-2016-7530", - "Namespace": "debian:8", - "Description": "Some other service", - "Link": "https://security-tracker.debian.org/tracker/CVE-2016-7530", - "Severity": "Unknown", - "FixedBy": "19.343-2", - }, - ], - ) - - analyzer = LayerAnalyzer(app.config, self.api) - analyzer.analyze_recursively(layer) - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest") - self.assertAnalyzed(layer, security_scanner, True, 1) - - # Ensure an event was written for the tag (if necessary). - time.sleep(1) - queue_item = notification_queue.get() - - if expect_notification: - self.assertIsNotNone(queue_item) - - body = json.loads(queue_item.body) - self.assertEqual(set(["latest", "prod"]), set(body["event_data"]["tags"])) - self.assertEqual("CVE-2014-9471", body["event_data"]["vulnerability"]["id"]) - self.assertEqual("Low", body["event_data"]["vulnerability"]["priority"]) - self.assertTrue(body["event_data"]["vulnerability"]["has_fix"]) - - self.assertEqual("CVE-2014-9471", body["event_data"]["vulnerabilities"][0]["id"]) - self.assertEqual(2, len(body["event_data"]["vulnerabilities"])) - - # Ensure we get the correct event message out as well. - event = VulnerabilityFoundEvent() - msg = "1 Low and 1 more vulnerabilities were detected in repository devtable/simple in 2 tags" - self.assertEqual(msg, event.get_summary(body["event_data"], {})) - self.assertEqual("info", event.get_level(body["event_data"], {})) - else: - self.assertIsNone(queue_item) - - # Ensure its security indexed engine was updated. - updated_layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest") - self.assertEquals(updated_layer.id, layer.id) - self.assertTrue(updated_layer.security_indexed_engine > 0) - - def test_analyze_layer_success_events(self): - # Not previously indexed at all => Notification - self.assert_analyze_layer_notify(IMAGE_NOT_SCANNED_ENGINE_VERSION, False, True) - - def test_analyze_layer_success_no_notification(self): - # Previously successfully indexed => No notification - self.assert_analyze_layer_notify(0, True, False) - - def test_analyze_layer_failed_then_success_notification(self): - # Previously failed to index => Notification - self.assert_analyze_layer_notify(0, False, True) - - def test_notification_new_layers_not_vulnerable(self): - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest", include_storage=True) - layer_id = "%s.%s" % (layer.docker_image_id, layer.storage.uuid) - - # Add a repo event for the layer. - repo = model.repository.get_repository(ADMIN_ACCESS_USER, SIMPLE_REPO) - model.notification.create_repo_notification( - repo, "vulnerability_found", "quay_notification", {}, {"level": 100} - ) - - # Ensure that there are no event queue items for the layer. - self.assertIsNone(notification_queue.get()) - - # Fire off the notification processing. - with fake_security_scanner() as security_scanner: - analyzer = LayerAnalyzer(app.config, self.api) - analyzer.analyze_recursively(layer) - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest") - self.assertAnalyzed(layer, security_scanner, True, 1) - - # Add a notification for the layer. - notification_data = security_scanner.add_notification([layer_id], [], {}, {}) - - # Process the notification. - self.assertTrue(process_notification_data(self.api, notification_data)) - - # Ensure that there are no event queue items for the layer. - self.assertIsNone(notification_queue.get()) - - def test_notification_delete(self): - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest", include_storage=True) - layer_id = "%s.%s" % (layer.docker_image_id, layer.storage.uuid) - - # Add a repo event for the layer. - repo = model.repository.get_repository(ADMIN_ACCESS_USER, SIMPLE_REPO) - model.notification.create_repo_notification( - repo, "vulnerability_found", "quay_notification", {}, {"level": 100} - ) - - # Ensure that there are no event queue items for the layer. - self.assertIsNone(notification_queue.get()) - - # Fire off the notification processing. - with fake_security_scanner() as security_scanner: - analyzer = LayerAnalyzer(app.config, self.api) - analyzer.analyze_recursively(layer) - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest") - self.assertAnalyzed(layer, security_scanner, True, 1) - - # Add a notification for the layer. - notification_data = security_scanner.add_notification([layer_id], None, {}, None) - - # Process the notification. - self.assertTrue(process_notification_data(self.api, notification_data)) - - # Ensure that there are no event queue items for the layer. - self.assertIsNone(notification_queue.get()) - - def test_notification_new_layers(self): - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest", include_storage=True) - layer_id = "%s.%s" % (layer.docker_image_id, layer.storage.uuid) - - # Add a repo event for the layer. - repo = model.repository.get_repository(ADMIN_ACCESS_USER, SIMPLE_REPO) - model.notification.create_repo_notification( - repo, "vulnerability_found", "quay_notification", {}, {"level": 100} - ) - - # Ensure that there are no event queue items for the layer. - self.assertIsNone(notification_queue.get()) - - # Fire off the notification processing. - with fake_security_scanner() as security_scanner: - analyzer = LayerAnalyzer(app.config, self.api) - analyzer.analyze_recursively(layer) - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest") - self.assertAnalyzed(layer, security_scanner, True, 1) - - vuln_info = { - "Name": "CVE-TEST", - "Namespace": "debian:8", - "Description": "Some service", - "Link": "https://security-tracker.debian.org/tracker/CVE-2014-9471", - "Severity": "Low", - "FixedIn": {"Version": "9.23-5"}, - } - security_scanner.set_vulns(layer_id, [vuln_info]) - - # Add a notification for the layer. - notification_data = security_scanner.add_notification( - [], [layer_id], vuln_info, vuln_info - ) - - # Process the notification. - self.assertTrue(process_notification_data(self.api, notification_data)) - - # Ensure an event was written for the tag. - time.sleep(1) - queue_item = notification_queue.get() - self.assertIsNotNone(queue_item) - - item_body = json.loads(queue_item.body) - self.assertEqual(sorted(["prod", "latest"]), sorted(item_body["event_data"]["tags"])) - self.assertEqual("CVE-TEST", item_body["event_data"]["vulnerability"]["id"]) - self.assertEqual("Low", item_body["event_data"]["vulnerability"]["priority"]) - self.assertTrue(item_body["event_data"]["vulnerability"]["has_fix"]) - - def test_notification_no_new_layers(self): - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest", include_storage=True) - - # Add a repo event for the layer. - repo = model.repository.get_repository(ADMIN_ACCESS_USER, SIMPLE_REPO) - model.notification.create_repo_notification( - repo, "vulnerability_found", "quay_notification", {}, {"level": 100} - ) - - # Ensure that there are no event queue items for the layer. - self.assertIsNone(notification_queue.get()) - - # Fire off the notification processing. - with fake_security_scanner() as security_scanner: - analyzer = LayerAnalyzer(app.config, self.api) - analyzer.analyze_recursively(layer) - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest") - self.assertAnalyzed(layer, security_scanner, True, 1) - - # Add a notification for the layer. - notification_data = security_scanner.add_notification([], [], {}, {}) - - # Process the notification. - self.assertTrue(process_notification_data(self.api, notification_data)) - - # Ensure that there are no event queue items for the layer. - self.assertIsNone(notification_queue.get()) - - def notification_tuple(self, notification): - # TODO: Replace this with a method once we refactor the notification stuff into its - # own module. - return AttrDict( - { - "event_config_dict": json.loads(notification.event_config_json), - "method_config_dict": json.loads(notification.config_json), - } - ) - - def test_notification_no_new_layers_increased_severity(self): - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest", include_storage=True) - layer_id = "%s.%s" % (layer.docker_image_id, layer.storage.uuid) - - # Add a repo event for the layer. - repo = model.repository.get_repository(ADMIN_ACCESS_USER, SIMPLE_REPO) - notification = model.notification.create_repo_notification( - repo, "vulnerability_found", "quay_notification", {}, {"level": 100} - ) - - # Ensure that there are no event queue items for the layer. - self.assertIsNone(notification_queue.get()) - - # Fire off the notification processing. - with fake_security_scanner() as security_scanner: - analyzer = LayerAnalyzer(app.config, self.api) - analyzer.analyze_recursively(layer) - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest") - self.assertAnalyzed(layer, security_scanner, True, 1) - - old_vuln_info = { - "Name": "CVE-TEST", - "Namespace": "debian:8", - "Description": "Some service", - "Link": "https://security-tracker.debian.org/tracker/CVE-2014-9471", - "Severity": "Low", - } - - new_vuln_info = { - "Name": "CVE-TEST", - "Namespace": "debian:8", - "Description": "Some service", - "Link": "https://security-tracker.debian.org/tracker/CVE-2014-9471", - "Severity": "Critical", - "FixedIn": {"Version": "9.23-5"}, - } - - security_scanner.set_vulns(layer_id, [new_vuln_info]) - - # Add a notification for the layer. - notification_data = security_scanner.add_notification( - [layer_id], [layer_id], old_vuln_info, new_vuln_info - ) - - # Process the notification. - self.assertTrue(process_notification_data(self.api, notification_data)) - - # Ensure an event was written for the tag. - time.sleep(1) - queue_item = notification_queue.get() - self.assertIsNotNone(queue_item) - - item_body = json.loads(queue_item.body) - self.assertEqual(sorted(["prod", "latest"]), sorted(item_body["event_data"]["tags"])) - self.assertEqual("CVE-TEST", item_body["event_data"]["vulnerability"]["id"]) - self.assertEqual("Critical", item_body["event_data"]["vulnerability"]["priority"]) - self.assertTrue(item_body["event_data"]["vulnerability"]["has_fix"]) - - # Verify that an event would be raised. - event_data = item_body["event_data"] - notification = self.notification_tuple(notification) - self.assertTrue(VulnerabilityFoundEvent().should_perform(event_data, notification)) - - # Create another notification with a matching level and verify it will be raised. - notification = model.notification.create_repo_notification( - repo, "vulnerability_found", "quay_notification", {}, {"level": 1} - ) - - notification = self.notification_tuple(notification) - self.assertTrue(VulnerabilityFoundEvent().should_perform(event_data, notification)) - - # Create another notification with a higher level and verify it won't be raised. - notification = model.notification.create_repo_notification( - repo, "vulnerability_found", "quay_notification", {}, {"level": 0} - ) - notification = self.notification_tuple(notification) - self.assertFalse(VulnerabilityFoundEvent().should_perform(event_data, notification)) - - def test_select_images_to_scan(self): - # Set all images to have a security index of a version to that of the config. - expected_version = app.config["SECURITY_SCANNER_ENGINE_VERSION_TARGET"] - Image.update(security_indexed_engine=expected_version).execute() - - # Ensure no images are available for scanning. - self.assertIsNone(model.image.get_min_id_for_sec_scan(expected_version)) - self.assertTrue(len(model.image.get_images_eligible_for_scan(expected_version)) == 0) - - # Check for a higher version. - self.assertIsNotNone(model.image.get_min_id_for_sec_scan(expected_version + 1)) - self.assertTrue(len(model.image.get_images_eligible_for_scan(expected_version + 1)) > 0) - - def test_notification_worker(self): - layer1 = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest", include_storage=True) - layer2 = _get_legacy_image(ADMIN_ACCESS_USER, COMPLEX_REPO, "prod", include_storage=True) - - # Add a repo events for the layers. - simple_repo = model.repository.get_repository(ADMIN_ACCESS_USER, SIMPLE_REPO) - complex_repo = model.repository.get_repository(ADMIN_ACCESS_USER, COMPLEX_REPO) - - model.notification.create_repo_notification( - simple_repo, "vulnerability_found", "quay_notification", {}, {"level": 100} - ) - model.notification.create_repo_notification( - complex_repo, "vulnerability_found", "quay_notification", {}, {"level": 100} - ) - - # Ensure that there are no event queue items for the layer. - self.assertIsNone(notification_queue.get()) - - with fake_security_scanner() as security_scanner: - # Test with an unknown notification. - worker = SecurityNotificationWorker(None) - self.assertFalse(worker.perform_notification_work({"Name": "unknownnotification"})) - - # Add some analyzed layers. - analyzer = LayerAnalyzer(app.config, self.api) - analyzer.analyze_recursively(layer1) - analyzer.analyze_recursively(layer2) - - # Add a notification with pages of data. - new_vuln_info = { - "Name": "CVE-TEST", - "Namespace": "debian:8", - "Description": "Some service", - "Link": "https://security-tracker.debian.org/tracker/CVE-2014-9471", - "Severity": "Critical", - "FixedIn": {"Version": "9.23-5"}, - } - - security_scanner.set_vulns(security_scanner.layer_id(layer1), [new_vuln_info]) - security_scanner.set_vulns(security_scanner.layer_id(layer2), [new_vuln_info]) - - layer_ids = [security_scanner.layer_id(layer1), security_scanner.layer_id(layer2)] - notification_data = security_scanner.add_notification( - [], layer_ids, None, new_vuln_info - ) - - # Test with a known notification with pages. - data = { - "Name": notification_data["Name"], - } - - worker = SecurityNotificationWorker(None) - self.assertTrue(worker.perform_notification_work(data, layer_limit=2)) - - # Make sure all pages were processed by ensuring we have two notifications. - time.sleep(1) - self.assertIsNotNone(notification_queue.get()) - self.assertIsNotNone(notification_queue.get()) - - def test_notification_worker_offset_pages_not_indexed(self): - # Try without indexes. - self.assert_notification_worker_offset_pages(indexed=False) - - def test_notification_worker_offset_pages_indexed(self): - # Try with indexes. - self.assert_notification_worker_offset_pages(indexed=True) - - def assert_notification_worker_offset_pages(self, indexed=False): - layer1 = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest", include_storage=True) - layer2 = _get_legacy_image(ADMIN_ACCESS_USER, COMPLEX_REPO, "prod", include_storage=True) - - # Add a repo events for the layers. - simple_repo = model.repository.get_repository(ADMIN_ACCESS_USER, SIMPLE_REPO) - complex_repo = model.repository.get_repository(ADMIN_ACCESS_USER, COMPLEX_REPO) - - model.notification.create_repo_notification( - simple_repo, "vulnerability_found", "quay_notification", {}, {"level": 100} - ) - model.notification.create_repo_notification( - complex_repo, "vulnerability_found", "quay_notification", {}, {"level": 100} - ) - - # Ensure that there are no event queue items for the layer. - self.assertIsNone(notification_queue.get()) - - with fake_security_scanner() as security_scanner: - # Test with an unknown notification. - worker = SecurityNotificationWorker(None) - self.assertFalse(worker.perform_notification_work({"Name": "unknownnotification"})) - - # Add some analyzed layers. - analyzer = LayerAnalyzer(app.config, self.api) - analyzer.analyze_recursively(layer1) - analyzer.analyze_recursively(layer2) - - # Add a notification with pages of data. - new_vuln_info = { - "Name": "CVE-TEST", - "Namespace": "debian:8", - "Description": "Some service", - "Link": "https://security-tracker.debian.org/tracker/CVE-2014-9471", - "Severity": "Critical", - "FixedIn": {"Version": "9.23-5"}, - } - - security_scanner.set_vulns(security_scanner.layer_id(layer1), [new_vuln_info]) - security_scanner.set_vulns(security_scanner.layer_id(layer2), [new_vuln_info]) - - # Define offsetting sets of layer IDs, to test cross-pagination support. In this test, we - # will only serve 2 layer IDs per page: the first page will serve both of the 'New' layer IDs, - # but since the first 2 'Old' layer IDs are "earlier" than the shared ID of - # `devtable/simple:latest`, they won't get served in the 'New' list until the *second* page. - # The notification handling system should correctly not notify for this layer, even though it - # is marked 'New' on page 1 and marked 'Old' on page 2. Clair will served these - # IDs sorted in the same manner. - idx_old_layer_ids = [ - {"LayerName": "old1", "Index": 1}, - {"LayerName": "old2", "Index": 2}, - {"LayerName": security_scanner.layer_id(layer1), "Index": 3}, - ] - - idx_new_layer_ids = [ - {"LayerName": security_scanner.layer_id(layer1), "Index": 3}, - {"LayerName": security_scanner.layer_id(layer2), "Index": 4}, - ] - - old_layer_ids = [t["LayerName"] for t in idx_old_layer_ids] - new_layer_ids = [t["LayerName"] for t in idx_new_layer_ids] - - if not indexed: - idx_old_layer_ids = None - idx_new_layer_ids = None - - notification_data = security_scanner.add_notification( - old_layer_ids, - new_layer_ids, - None, - new_vuln_info, - max_per_page=2, - indexed_old_layer_ids=idx_old_layer_ids, - indexed_new_layer_ids=idx_new_layer_ids, - ) - - # Test with a known notification with pages. - data = { - "Name": notification_data["Name"], - } - - worker = SecurityNotificationWorker(None) - self.assertTrue(worker.perform_notification_work(data, layer_limit=2)) - - # Make sure all pages were processed by ensuring we have only one notification. If the second - # page was not processed, then the `Old` entry for layer1 will not be found, and we'd get two - # notifications. - time.sleep(1) - self.assertIsNotNone(notification_queue.get()) - self.assertIsNone(notification_queue.get()) - - def test_layer_gc(self): - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest", include_storage=True) - - # Delete the prod tag so that only the `latest` tag remains. - _delete_tag(ADMIN_ACCESS_USER, SIMPLE_REPO, "prod") - - with fake_security_scanner() as security_scanner: - # Analyze the layer. - analyzer = LayerAnalyzer(app.config, self.api) - analyzer.analyze_recursively(layer) - - layer = _get_legacy_image(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest") - self.assertAnalyzed(layer, security_scanner, True, 1) - self.assertTrue(security_scanner.has_layer(security_scanner.layer_id(layer))) - - namespace_user = model.user.get_user(ADMIN_ACCESS_USER) - model.user.change_user_tag_expiration(namespace_user, 0) - - # Delete the tag in the repository and GC. - _delete_tag(ADMIN_ACCESS_USER, SIMPLE_REPO, "latest") - time.sleep(1) - - repo = model.repository.get_repository(ADMIN_ACCESS_USER, SIMPLE_REPO) - model.gc.garbage_collect_repo(repo) - - # Ensure that the security scanner no longer has the image. - self.assertFalse(security_scanner.has_layer(security_scanner.layer_id(layer))) + self.assertEquals(result["Layer"]["Name"], security_scanner.layer_id(manifest)) if __name__ == "__main__": diff --git a/test/testconfig.py b/test/testconfig.py index 301f7f71c..a9a1aac71 100644 --- a/test/testconfig.py +++ b/test/testconfig.py @@ -66,7 +66,6 @@ class TestConfig(DefaultConfig): SECURITY_SCANNER_ENGINE_VERSION_TARGET = 1 SECURITY_SCANNER_API_TIMEOUT_SECONDS = 1 SECURITY_SCANNER_V4_ENDPOINT = "http://fakesecurityscanner/" - SECURITY_SCANNER_V4_NAMESPACE_WHITELIST = ["devtable"] FEATURE_SIGNING = True diff --git a/util/config/configutil.py b/util/config/configutil.py index e9aa44813..0e2e50a0f 100644 --- a/util/config/configutil.py +++ b/util/config/configutil.py @@ -48,15 +48,6 @@ def add_enterprise_config_defaults(config_obj, current_secret_key): config_obj["REPO_MIRROR_TLS_VERIFY"] = config_obj.get("REPO_MIRROR_TLS_VERIFY", True) config_obj["REPO_MIRROR_SERVER_HOSTNAME"] = config_obj.get("REPO_MIRROR_SERVER_HOSTNAME", None) - # Default the signer config. - config_obj["GPG2_PRIVATE_KEY_FILENAME"] = config_obj.get( - "GPG2_PRIVATE_KEY_FILENAME", "signing-private.gpg" - ) - config_obj["GPG2_PUBLIC_KEY_FILENAME"] = config_obj.get( - "GPG2_PUBLIC_KEY_FILENAME", "signing-public.gpg" - ) - config_obj["SIGNING_ENGINE"] = config_obj.get("SIGNING_ENGINE", "gpg2") - # Default security scanner config. config_obj["FEATURE_SECURITY_NOTIFICATIONS"] = config_obj.get( "FEATURE_SECURITY_NOTIFICATIONS", True diff --git a/util/config/schema.py b/util/config/schema.py index 4e543da2f..cf35f0c12 100644 --- a/util/config/schema.py +++ b/util/config/schema.py @@ -18,6 +18,7 @@ INTERNAL_ONLY_PROPERTIES = { "FEATURE_REPOSITORY_ACTION_COUNTER", "APP_REGISTRY_PACKAGE_LIST_CACHE_WHITELIST", "APP_REGISTRY_SHOW_PACKAGE_CACHE_WHITELIST", + "FEATURE_MANIFEST_SIZE_BACKFILL", "TESTING", "SEND_FILE_MAX_AGE_DEFAULT", "DISABLED_FOR_AUDIT_LOGS", @@ -29,7 +30,6 @@ INTERNAL_ONLY_PROPERTIES = { "REPLICATION_QUEUE_NAME", "DOCKERFILE_BUILD_QUEUE_NAME", "CHUNK_CLEANUP_QUEUE_NAME", - "SECSCAN_NOTIFICATION_QUEUE_NAME", "SECURITY_SCANNER_ISSUER_NAME", "NOTIFICATION_QUEUE_NAME", "REPOSITORY_GC_QUEUE_NAME", @@ -57,7 +57,6 @@ INTERNAL_ONLY_PROPERTIES = { "JWTPROXY_AUDIENCE", "JWTPROXY_SIGNER", "SECURITY_SCANNER_INDEXING_MIN_ID", - "SECURITY_SCANNER_V4_NAMESPACE_WHITELIST", "SECURITY_SCANNER_V4_REINDEX_THRESHOLD", "STATIC_SITE_BUCKET", "LABEL_KEY_RESERVED_PREFIXES", diff --git a/util/config/validator.py b/util/config/validator.py index 69a0f6339..6e7c88c0d 100644 --- a/util/config/validator.py +++ b/util/config/validator.py @@ -12,7 +12,6 @@ from util.config.validators.validate_ldap import LDAPValidator from util.config.validators.validate_keystone import KeystoneValidator from util.config.validators.validate_jwt import JWTAuthValidator from util.config.validators.validate_secscan import SecurityScannerValidator -from util.config.validators.validate_signer import SignerValidator from util.config.validators.validate_ssl import SSLValidator, SSL_FILENAMES from util.config.validators.validate_google_login import GoogleLoginValidator from util.config.validators.validate_bitbucket_trigger import BitbucketTriggerValidator @@ -62,7 +61,6 @@ VALIDATORS = { LDAPValidator.name: LDAPValidator.validate, JWTAuthValidator.name: JWTAuthValidator.validate, KeystoneValidator.name: KeystoneValidator.validate, - SignerValidator.name: SignerValidator.validate, SecurityScannerValidator.name: SecurityScannerValidator.validate, OIDCLoginValidator.name: OIDCLoginValidator.validate, TimeMachineValidator.name: TimeMachineValidator.validate, diff --git a/util/config/validators/test/test_validate_signer.py b/util/config/validators/test/test_validate_signer.py deleted file mode 100644 index d7ac8bccb..000000000 --- a/util/config/validators/test/test_validate_signer.py +++ /dev/null @@ -1,24 +0,0 @@ -import pytest - -from util.config.validator import ValidatorContext -from util.config.validators import ConfigValidationException -from util.config.validators.validate_signer import SignerValidator - -from test.fixtures import * - - -@pytest.mark.parametrize( - "unvalidated_config,expected", - [ - ({}, None), - ({"SIGNING_ENGINE": "foobar"}, ConfigValidationException), - ({"SIGNING_ENGINE": "gpg2"}, Exception), - ], -) -def test_validate_signer(unvalidated_config, expected, app): - validator = SignerValidator() - if expected is not None: - with pytest.raises(expected): - validator.validate(ValidatorContext(unvalidated_config)) - else: - validator.validate(ValidatorContext(unvalidated_config)) diff --git a/util/config/validators/validate_signer.py b/util/config/validators/validate_signer.py deleted file mode 100644 index 4b31fc58f..000000000 --- a/util/config/validators/validate_signer.py +++ /dev/null @@ -1,25 +0,0 @@ -from io import StringIO - -from util.config.validators import BaseValidator, ConfigValidationException -from util.security.signing import SIGNING_ENGINES - - -class SignerValidator(BaseValidator): - name = "signer" - - @classmethod - def validate(cls, validator_context): - """ - Validates the GPG public+private key pair used for signing converted ACIs. - """ - config = validator_context.config - config_provider = validator_context.config_provider - - if config.get("SIGNING_ENGINE") is None: - return - - if config["SIGNING_ENGINE"] not in SIGNING_ENGINES: - raise ConfigValidationException("Unknown signing engine: %s" % config["SIGNING_ENGINE"]) - - engine = SIGNING_ENGINES[config["SIGNING_ENGINE"]](config, config_provider) - engine.detached_sign(BytesIO(b"test string")) diff --git a/util/registry/aufs.py b/util/registry/aufs.py deleted file mode 100644 index c40158dde..000000000 --- a/util/registry/aufs.py +++ /dev/null @@ -1,38 +0,0 @@ -import os - -AUFS_METADATA = ".wh..wh." -AUFS_WHITEOUT = ".wh." -AUFS_WHITEOUT_PREFIX_LENGTH = len(AUFS_WHITEOUT) - - -def is_aufs_metadata(absolute): - """ - Returns whether the given absolute references an AUFS metadata file. - """ - filename = os.path.basename(absolute) - return filename.startswith(AUFS_METADATA) or absolute.startswith(AUFS_METADATA) - - -def get_deleted_filename(absolute): - """ - Returns the name of the deleted file referenced by the AUFS whiteout file at the given path or - None if the file path does not reference a whiteout file. - """ - filename = os.path.basename(absolute) - if not filename.startswith(AUFS_WHITEOUT): - return None - - return filename[AUFS_WHITEOUT_PREFIX_LENGTH:] - - -def get_deleted_prefix(absolute): - """ - Returns the path prefix of the deleted file referenced by the AUFS whiteout file at the given - path or None if the file path does not reference a whiteout file. - """ - deleted_filename = get_deleted_filename(absolute) - if deleted_filename is None: - return None - - dirname = os.path.dirname(absolute) - return os.path.join("/", dirname, deleted_filename)[1:] diff --git a/util/registry/gzipwrap.py b/util/registry/gzipwrap.py deleted file mode 100644 index 06c00ca88..000000000 --- a/util/registry/gzipwrap.py +++ /dev/null @@ -1,62 +0,0 @@ -from gzip import GzipFile - -# 256K buffer to Gzip -GZIP_BUFFER_SIZE = 1024 * 256 - - -class GzipWrap(object): - def __init__(self, input, filename=None, compresslevel=1): - self.input = iter(input) - self.buffer = b"" - self.zipper = GzipFile( - filename, mode="wb", fileobj=self, compresslevel=compresslevel, mtime=0 - ) - self.is_done = False - - def read(self, size=-1): - if size is None or size < 0: - raise Exception("Call to GzipWrap with unbound size will result in poor performance") - - # If the buffer already has enough bytes, then simply pop them off of - # the beginning and return them. - if len(self.buffer) >= size or self.is_done: - ret = self.buffer[0:size] - self.buffer = self.buffer[size:] - return ret - - # Otherwise, zip the input until we have enough bytes. - while True: - # Attempt to retrieve the next bytes to write. - is_done = False - - input_size = 0 - input_buffer = b"" - while input_size < GZIP_BUFFER_SIZE: - try: - s = next(self.input) - input_buffer += s - input_size = input_size + len(s) - except StopIteration: - is_done = True - break - - self.zipper.write(input_buffer) - - if is_done: - self.zipper.flush() - self.zipper.close() - self.is_done = True - - if len(self.buffer) >= size or is_done: - ret = self.buffer[0:size] - self.buffer = self.buffer[size:] - return ret - - def flush(self): - pass - - def write(self, data): - self.buffer += data - - def close(self): - self.input.close() diff --git a/util/registry/queuefile.py b/util/registry/queuefile.py deleted file mode 100644 index dac068701..000000000 --- a/util/registry/queuefile.py +++ /dev/null @@ -1,87 +0,0 @@ -from multiprocessing.queues import Empty, Queue - - -class QueueFile(object): - """ - Class which implements a file-like interface and reads QueueResult's from a blocking - multiprocessing queue. - """ - - def __init__(self, queue, name=None, timeout=None): - self._queue = queue - self._closed = False - self._done = False - self._buffer = b"" - self._total_size = 0 - self._name = name - self.raised_exception = False - self._exception_handlers = [] - self._timeout = timeout - - def add_exception_handler(self, handler): - self._exception_handlers.append(handler) - - def read(self, size=-1): - # If the queuefile was closed or we have finished, send back any remaining data. - if self._closed or self._done: - if size == -1: - buf = self._buffer - self._buffer = b"" - return buf - - buf = self._buffer[0:size] - self._buffer = self._buffer[size:] - return buf - - # Loop until we reach the requested data size (or forever if all data was requested). - while (len(self._buffer) < size) or (size == -1): - exception = None - try: - result = self._queue.get(block=True, timeout=self._timeout) - exception = result.exception - except Empty as em: - exception = em - - # Check for any exceptions raised by the queue process. - if exception is not None: - self._closed = True - self.raised_exception = True - - # Fire off the exception to any registered handlers. If no handlers were registered, - # then raise the exception locally. - handled = False - for handler in self._exception_handlers: - handler(exception) - handled = True - - if handled: - return b"" - else: - raise exception - - # Check for no further data. If the QueueProcess has finished producing data, then break - # out of the loop to return the data already acquired. - if result.data is None: - self._done = True - break - - # Add the data to the buffer. - self._buffer += result.data - self._total_size += len(result.data) - - # Return the requested slice of the buffer. - if size == -1: - buf = self._buffer - self._buffer = b"" - return buf - - buf = self._buffer[0:size] - self._buffer = self._buffer[size:] - return buf - - def flush(self): - # Purposefully not implemented. - pass - - def close(self): - self._closed = True diff --git a/util/registry/queueprocess.py b/util/registry/queueprocess.py deleted file mode 100644 index eab652459..000000000 --- a/util/registry/queueprocess.py +++ /dev/null @@ -1,81 +0,0 @@ -from multiprocessing import Process, Queue -from collections import namedtuple - -import logging -import multiprocessing -import time -import sys -import traceback - - -logger = multiprocessing.log_to_stderr() -logger.setLevel(logging.INFO) - - -class QueueProcess(object): - """ - Helper class which invokes a worker in a process to produce data for one (or more) queues. - """ - - def __init__(self, get_producer, chunk_size, max_size, args, finished=None): - self._get_producer = get_producer - self._queues = [] - self._chunk_size = chunk_size - self._max_size = max_size - self._args = args or [] - self._finished = finished - - def create_queue(self): - """ - Adds a multiprocessing queue to the list of queues. - - Any queues added will have the data produced appended. - """ - queue = Queue(self._max_size // self._chunk_size) - self._queues.append(queue) - return queue - - @staticmethod - def run_process(target, args, finished=None): - def _target(tar, arg, fin): - try: - tar(*args) - finally: - if fin: - fin() - - Process(target=_target, args=(target, args, finished)).start() - - def run(self): - # Important! gipc is used here because normal multiprocessing does not work - # correctly with gevent when we sleep. - args = (self._get_producer, self._queues, self._chunk_size, self._args) - QueueProcess.run_process(_run, args, finished=self._finished) - - -QueueResult = namedtuple("QueueResult", ["data", "exception"]) - - -def _run(get_producer, queues, chunk_size, args): - producer = get_producer(*args) - while True: - try: - result = QueueResult(producer(chunk_size) or None, None) - except Exception as ex: - message = "%s\n%s" % (str(ex), "".join(traceback.format_exception(*sys.exc_info()))) - result = QueueResult(None, Exception(message)) - - for queue in queues: - try: - queue.put(result, block=True) - except Exception as ex: - logger.exception("Exception writing to queue.") - return - - # Terminate the producer loop if the data produced is empty or an exception occurred. - if result.data is None or result.exception is not None: - break - - # Important! This allows the thread that writes the queue data to the pipe - # to do so. Otherwise, this hangs. - time.sleep(0) diff --git a/util/registry/streamlayerformat.py b/util/registry/streamlayerformat.py deleted file mode 100644 index 39c05ebfb..000000000 --- a/util/registry/streamlayerformat.py +++ /dev/null @@ -1,76 +0,0 @@ -import os -import tarfile - -import marisa_trie - -from util.registry.aufs import is_aufs_metadata, get_deleted_prefix -from util.registry.tarlayerformat import TarLayerFormat - - -class StreamLayerMerger(TarLayerFormat): - """ - Class which creates a generator of the combined TAR data for a set of Docker layers. - """ - - def __init__(self, get_tar_stream_iterator, path_prefix=None, reporter=None): - super(StreamLayerMerger, self).__init__( - get_tar_stream_iterator, path_prefix, reporter=reporter - ) - - self.path_trie = marisa_trie.Trie() - self.path_encountered = set() - - self.deleted_prefix_trie = marisa_trie.Trie() - self.deleted_prefixes_encountered = set() - - def after_tar_layer(self): - # Update the tries. - self.path_trie = marisa_trie.Trie(self.path_encountered) - self.deleted_prefix_trie = marisa_trie.Trie(self.deleted_prefixes_encountered) - - @staticmethod - def _normalize_path(path): - return os.path.relpath(path, "./") - - def _check_deleted(self, absolute): - ubsolute = str(absolute) - for prefix in self.deleted_prefix_trie.iter_prefixes(ubsolute): - if not os.path.relpath(ubsolute, prefix).startswith(".."): - return True - - return False - - def is_skipped_file(self, filename): - absolute = StreamLayerMerger._normalize_path(filename) - - # Skip metadata. - if is_aufs_metadata(absolute): - return True - - # Check if the file is under a deleted path. - if self._check_deleted(absolute): - return True - - # Check if this file has already been encountered somewhere. If so, - # skip it. - ubsolute = str(absolute) - if ubsolute in self.path_trie: - return True - - return False - - def should_append_file(self, filename): - if self.is_skipped_file(filename): - return False - - absolute = StreamLayerMerger._normalize_path(filename) - - # Add any prefix of deleted paths to the prefix list. - deleted_prefix = get_deleted_prefix(absolute) - if deleted_prefix is not None: - self.deleted_prefixes_encountered.add(deleted_prefix) - return False - - # Otherwise, add the path to the encountered list and return it. - self.path_encountered.add(absolute) - return True diff --git a/util/registry/tarlayerformat.py b/util/registry/tarlayerformat.py deleted file mode 100644 index 08d7bd752..000000000 --- a/util/registry/tarlayerformat.py +++ /dev/null @@ -1,202 +0,0 @@ -import os -import tarfile -import copy - -from abc import ABCMeta, abstractmethod -from collections import defaultdict -from six import add_metaclass - -from util.abchelpers import nooper - - -class TarLayerReadException(Exception): - """ - Exception raised when reading a layer has failed. - """ - - pass - - -# 9MB (+ padding below) so that it matches the 10MB expected by Gzip. -CHUNK_SIZE = 1024 * 1024 * 9 - - -@add_metaclass(ABCMeta) -class TarLayerFormatterReporter(object): - @abstractmethod - def report_pass(self, stream_count): - """ - Reports a formatting pass. - """ - pass - - -@nooper -class NoopReporter(TarLayerFormatterReporter): - pass - - -@add_metaclass(ABCMeta) -class TarLayerFormat(object): - """ - Class which creates a generator of the combined TAR data. - """ - - def __init__(self, tar_stream_getter_iterator, path_prefix=None, reporter=None): - self.tar_stream_getter_iterator = tar_stream_getter_iterator - self.path_prefix = path_prefix or "" - self.reporter = reporter or NoopReporter() - - def get_generator(self): - for stream_getter in self.tar_stream_getter_iterator(): - current_tar_stream = stream_getter() - - # Read the current TAR. If it is empty, we just continue - # to the next one. - tar_file = TarLayerFormat._tar_file_from_stream(current_tar_stream) - if not tar_file: - continue - - # For each of the tar entries, yield them IF and ONLY IF we have not - # encountered the path before. - dangling_hard_links = defaultdict(list) - try: - for tar_info in tar_file: - if not self.should_append_file(tar_info.name): - continue - - # Note: We use a copy here because we need to make sure we copy over all the internal - # data of the tar header. We cannot use frombuf(tobuf()), however, because it doesn't - # properly handle large filenames. - clone = copy.deepcopy(tar_info) - clone.name = os.path.join(self.path_prefix, clone.name) - - # If the entry is a *hard* link, then prefix it as well. Soft links are relative. - if clone.linkname and clone.type == tarfile.LNKTYPE: - # If the entry is a dangling hard link, we skip here. Dangling hard links will be handled - # in a second pass. - if self.is_skipped_file(tar_info.linkname): - dangling_hard_links[tar_info.linkname].append(tar_info) - continue - - clone.linkname = os.path.join(self.path_prefix, clone.linkname) - - # Yield the tar header. - yield clone.tobuf() - - # Try to extract any file contents for the tar. If found, we yield them as well. - if tar_info.isreg(): - for block in TarLayerFormat._emit_file(tar_file, tar_info): - yield block - except UnicodeDecodeError as ude: - raise TarLayerReadException("Decode error: %s" % ude) - - # Close the layer stream now that we're done with it. - tar_file.close() - - # If there are any dangling hard links, open a new stream and retarget the dangling hard - # links to a new copy of the contents, which will be placed under the *first* dangling hard - # link's name. - if len(dangling_hard_links) > 0: - tar_file = TarLayerFormat._tar_file_from_stream(stream_getter()) - if not tar_file: - raise TarLayerReadException("Could not re-read tar layer") - - for tar_info in tar_file: - # If we encounter a file that holds the data for a dangling link, - # emit it under the name of the first dangling hard link. All other - # dangling hard links will be retargeted to this first name. - if tar_info.name in dangling_hard_links: - first_dangling = dangling_hard_links[tar_info.name][0] - - # Copy the first dangling hard link, change it to a normal file, - # and emit the deleted file's contents for it. - clone = copy.deepcopy(first_dangling) - clone.name = os.path.join(self.path_prefix, first_dangling.name) - clone.type = tar_info.type - clone.size = tar_info.size - clone.pax_headers = tar_info.pax_headers - yield clone.tobuf() - - for block in TarLayerFormat._emit_file(tar_file, tar_info): - yield block - - elif ( - tar_info.type == tarfile.LNKTYPE - and tar_info.linkname in dangling_hard_links - and not self.is_skipped_file(tar_info.name) - ): - # Retarget if necessary. All dangling hard links (but the first) will - # need to be retargeted. - first_dangling = dangling_hard_links[tar_info.linkname][0] - if tar_info.name == first_dangling.name: - # Skip; the first dangling is handled above. - continue - - # Retarget the hard link to the first dangling hard link. - clone = copy.deepcopy(tar_info) - clone.name = os.path.join(self.path_prefix, clone.name) - clone.linkname = os.path.join(self.path_prefix, first_dangling.name) - yield clone.tobuf() - - # Close the layer stream now that we're done with it. - tar_file.close() - - # Conduct any post-tar work. - self.after_tar_layer() - self.reporter.report_pass(2 if len(dangling_hard_links) > 0 else 1) - - # Last two records are empty in TAR spec. - yield b"\0" * 512 - yield b"\0" * 512 - - @abstractmethod - def is_skipped_file(self, filename): - """ - Returns true if the file with the given name will be skipped during append. - """ - pass - - @abstractmethod - def should_append_file(self, filename): - """ - Returns true if the file with the given name should be appended when producing the new TAR. - """ - pass - - @abstractmethod - def after_tar_layer(self): - """ - Invoked after a TAR layer is added, to do any post-add work. - """ - pass - - @staticmethod - def _tar_file_from_stream(stream): - tar_file = None - try: - tar_file = tarfile.open(mode="r|*", fileobj=stream) - except tarfile.ReadError as re: - if str(re) != "empty file": - raise TarLayerReadException("Could not read layer") - - return tar_file - - @staticmethod - def _emit_file(tar_file, tar_info): - file_stream = tar_file.extractfile(tar_info) - if file_stream is not None: - length = 0 - while True: - current_block = file_stream.read(CHUNK_SIZE) - if not len(current_block): - break - - yield current_block - length += len(current_block) - - file_stream.close() - - # Files must be padding to 512 byte multiples. - if length % 512 != 0: - yield b"\0" * (512 - (length % 512)) diff --git a/util/registry/test/test_queuefile.py b/util/registry/test/test_queuefile.py deleted file mode 100644 index 0595121ac..000000000 --- a/util/registry/test/test_queuefile.py +++ /dev/null @@ -1,118 +0,0 @@ -import os - -import pytest - -from util.registry.queueprocess import QueueResult -from util.registry.queuefile import QueueFile - - -class FakeQueue(object): - def __init__(self): - self.items = [] - - def get(self, block, timeout=None): - return self.items.pop(0) - - def put(self, data): - self.items.append(data) - - -def test_basic(): - queue = FakeQueue() - queue.put(QueueResult(b"hello world", None)) - queue.put(QueueResult(b"! how goes there?", None)) - queue.put(QueueResult(None, None)) - - queuefile = QueueFile(queue) - assert queuefile.read() == b"hello world! how goes there?" - - -def test_chunk_reading(): - queue = FakeQueue() - queue.put(QueueResult(b"hello world", None)) - queue.put(QueueResult(b"! how goes there?", None)) - queue.put(QueueResult(None, None)) - - queuefile = QueueFile(queue) - data = b"" - - while True: - result = queuefile.read(size=2) - if not result: - break - - data += result - - assert data == b"hello world! how goes there?" - - -def test_unhandled_exception(): - queue = FakeQueue() - queue.put(QueueResult(b"hello world", None)) - queue.put(QueueResult(None, IOError("some exception"))) - queue.put(QueueResult(b"! how goes there?", None)) - queue.put(QueueResult(None, None)) - - queuefile = QueueFile(queue) - - with pytest.raises(IOError): - queuefile.read(size=12) - - -def test_handled_exception(): - queue = FakeQueue() - queue.put(QueueResult(b"hello world", None)) - queue.put(QueueResult(None, IOError("some exception"))) - queue.put(QueueResult(b"! how goes there?", None)) - queue.put(QueueResult(None, None)) - - ex_found = [None] - - def handler(ex): - ex_found[0] = ex - - queuefile = QueueFile(queue) - queuefile.add_exception_handler(handler) - queuefile.read(size=12) - - assert ex_found[0] is not None - - -def test_binary_data(): - queue = FakeQueue() - - # Generate some binary data. - binary_data = os.urandom(1024) - queue.put(QueueResult(binary_data, None)) - queue.put(QueueResult(None, None)) - - queuefile = QueueFile(queue) - found_data = b"" - while True: - current_data = queuefile.read(size=37) - if len(current_data) == 0: - break - - found_data = found_data + current_data - - assert found_data == binary_data - - -def test_empty_data(): - queue = FakeQueue() - - # Generate some empty binary data. - binary_data = b"\0" * 1024 - queue.put(QueueResult(binary_data, None)) - queue.put(QueueResult(None, None)) - - queuefile = QueueFile(queue) - found_data = b"" - while True: - current_data = queuefile.read(size=37) - if len(current_data) == 0: - break - - found_data = found_data + current_data - - assert found_data == binary_data diff --git a/util/registry/test/test_streamlayerformat.py b/util/registry/test/test_streamlayerformat.py deleted file mode 100644 index d329f3e9b..000000000 --- a/util/registry/test/test_streamlayerformat.py +++ /dev/null @@ -1,469 +0,0 @@ -import tarfile - -import pytest - -from io import BytesIO -from util.registry.streamlayerformat import StreamLayerMerger -from util.registry.aufs import AUFS_WHITEOUT -from util.registry.tarlayerformat import TarLayerReadException - - -def create_layer(*file_pairs): - output = BytesIO() - with tarfile.open(fileobj=output, mode="w:gz") as tar: - for current_filename, current_contents in file_pairs: - if current_contents is None: - # This is a deleted file. - if current_filename.endswith("/"): - current_filename = current_filename[:-1] - - parts = current_filename.split("/") - if len(parts) > 1: - current_filename = "/".join(parts[:-1]) + "/" + AUFS_WHITEOUT + parts[-1] - else: - current_filename = AUFS_WHITEOUT + parts[-1] - - current_contents = b"" - - if current_contents.startswith(b"linkto:"): - info = tarfile.TarInfo(name=current_filename) - info.linkname = current_contents[len(b"linkto:") :].decode("utf-8") - info.type = tarfile.LNKTYPE - tar.addfile(info) - else: - info = tarfile.TarInfo(name=current_filename) - info.size = len(current_contents) - tar.addfile(info, fileobj=BytesIO(current_contents)) - - return output.getvalue() - - -def create_empty_layer(): - return b"" - - -def squash_layers(layers, path_prefix=None): - def getter_for_layer(layer): - return lambda: BytesIO(layer) - - def layer_stream_getter(): - return [getter_for_layer(layer) for layer in layers] - - merger = StreamLayerMerger(layer_stream_getter, path_prefix=path_prefix) - merged_data = b"".join(list(merger.get_generator())) - return merged_data - - -def assertHasFile(squashed, filename, contents): - with tarfile.open(fileobj=BytesIO(squashed), mode="r:*") as tar: - member = tar.getmember(filename) - assert contents == b"\n".join(tar.extractfile(member).readlines()) - - -def assertDoesNotHaveFile(squashed, filename): - with tarfile.open(fileobj=BytesIO(squashed), mode="r:*") as tar: - try: - member = tar.getmember(filename) - except Exception as ex: - return - - assert False, "Filename %s found" % filename - - -def test_single_layer(): - tar_layer = create_layer( - ("some_file", b"foo"), ("another_file", b"bar"), ("third_file", b"meh") - ) - - squashed = squash_layers([tar_layer]) - - assertHasFile(squashed, "some_file", b"foo") - assertHasFile(squashed, "another_file", b"bar") - assertHasFile(squashed, "third_file", b"meh") - - -def test_multiple_layers(): - second_layer = create_layer( - ("some_file", b"foo"), ("another_file", b"bar"), ("third_file", b"meh") - ) - - first_layer = create_layer(("top_file", b"top")) - - squashed = squash_layers([first_layer, second_layer]) - - assertHasFile(squashed, "some_file", b"foo") - assertHasFile(squashed, "another_file", b"bar") - assertHasFile(squashed, "third_file", b"meh") - assertHasFile(squashed, "top_file", b"top") - - -def test_multiple_layers_dot(): - second_layer = create_layer( - ("./some_file", b"foo"), ("another_file", b"bar"), ("./third_file", b"meh") - ) - - first_layer = create_layer(("top_file", b"top")) - - squashed = squash_layers([first_layer, second_layer]) - - assertHasFile(squashed, "./some_file", b"foo") - assertHasFile(squashed, "another_file", b"bar") - assertHasFile(squashed, "./third_file", b"meh") - assertHasFile(squashed, "top_file", b"top") - - -def test_multiple_layers_overwrite(): - second_layer = create_layer( - ("some_file", b"foo"), ("another_file", b"bar"), ("third_file", b"meh") - ) - - first_layer = create_layer(("another_file", b"top")) - - squashed = squash_layers([first_layer, second_layer]) - - assertHasFile(squashed, "some_file", b"foo") - assertHasFile(squashed, "third_file", b"meh") - assertHasFile(squashed, "another_file", b"top") - - -def test_multiple_layers_overwrite_base_dot(): - second_layer = create_layer( - ("some_file", b"foo"), ("./another_file", b"bar"), ("third_file", b"meh") - ) - - first_layer = create_layer(("another_file", b"top")) - - squashed = squash_layers([first_layer, second_layer]) - - assertHasFile(squashed, "some_file", b"foo") - assertHasFile(squashed, "third_file", b"meh") - assertHasFile(squashed, "another_file", b"top") - assertDoesNotHaveFile(squashed, "./another_file") - - -def test_multiple_layers_overwrite_top_dot(): - second_layer = create_layer( - ("some_file", b"foo"), ("another_file", b"bar"), ("third_file", b"meh") - ) - - first_layer = create_layer(("./another_file", b"top")) - - squashed = squash_layers([first_layer, second_layer]) - - assertHasFile(squashed, "some_file", b"foo") - assertHasFile(squashed, "third_file", b"meh") - assertHasFile(squashed, "./another_file", b"top") - assertDoesNotHaveFile(squashed, "another_file") - - -def test_deleted_file(): - second_layer = create_layer( - ("some_file", b"foo"), ("another_file", b"bar"), ("third_file", b"meh") - ) - - first_layer = create_layer(("another_file", None)) - - squashed = squash_layers([first_layer, second_layer]) - - assertHasFile(squashed, "some_file", b"foo") - assertHasFile(squashed, "third_file", b"meh") - assertDoesNotHaveFile(squashed, "another_file") - - -def test_deleted_readded_file(): - third_layer = create_layer(("another_file", b"bar")) - - second_layer = create_layer( - ("some_file", b"foo"), ("another_file", None), ("third_file", b"meh") - ) - - first_layer = create_layer(("another_file", b"newagain")) - - squashed = squash_layers([first_layer, second_layer, third_layer]) - - assertHasFile(squashed, "some_file", b"foo") - assertHasFile(squashed, "third_file", b"meh") - assertHasFile(squashed, "another_file", b"newagain") - - -def test_deleted_in_lower_layer(): - third_layer = create_layer(("deleted_file", b"bar")) - - second_layer = create_layer( - ("some_file", b"foo"), ("deleted_file", None), ("third_file", b"meh") - ) - - first_layer = create_layer(("top_file", b"top")) - - squashed = squash_layers([first_layer, second_layer, third_layer]) - - assertHasFile(squashed, "some_file", b"foo") - assertHasFile(squashed, "third_file", b"meh") - assertHasFile(squashed, "top_file", b"top") - assertDoesNotHaveFile(squashed, "deleted_file") - - -def test_deleted_in_lower_layer_with_added_dot(): - third_layer = create_layer(("./deleted_file", b"something")) - - second_layer = create_layer(("deleted_file", None)) - - squashed = squash_layers([second_layer, third_layer]) - assertDoesNotHaveFile(squashed, "deleted_file") - - -def test_deleted_in_lower_layer_with_deleted_dot(): - third_layer = create_layer(("./deleted_file", b"something")) - - second_layer = create_layer(("./deleted_file", None)) - - squashed = squash_layers([second_layer, third_layer]) - assertDoesNotHaveFile(squashed, "deleted_file") - - -def test_directory(): - second_layer = create_layer(("foo/some_file", b"foo"), ("foo/another_file", b"bar")) - - first_layer = create_layer(("foo/some_file", b"top")) - - squashed = squash_layers([first_layer, second_layer]) - - assertHasFile(squashed, "foo/some_file", b"top") - assertHasFile(squashed, "foo/another_file", b"bar") - - -def test_sub_directory(): - second_layer = create_layer(("foo/some_file", b"foo"), ("foo/bar/another_file", b"bar")) - - first_layer = create_layer(("foo/some_file", b"top")) - - squashed = squash_layers([first_layer, second_layer]) - - assertHasFile(squashed, "foo/some_file", b"top") - assertHasFile(squashed, "foo/bar/another_file", b"bar") - - -def test_delete_directory(): - second_layer = create_layer(("foo/some_file", b"foo"), ("foo/another_file", b"bar")) - - first_layer = create_layer(("foo/", None)) - - squashed = squash_layers([first_layer, second_layer]) - - assertDoesNotHaveFile(squashed, "foo/some_file") - assertDoesNotHaveFile(squashed, "foo/another_file") - - -def test_delete_sub_directory(): - second_layer = create_layer(("foo/some_file", b"foo"), ("foo/bar/another_file", b"bar")) - - first_layer = create_layer(("foo/bar/", None)) - - squashed = squash_layers([first_layer, second_layer]) - - assertDoesNotHaveFile(squashed, "foo/bar/another_file") - assertHasFile(squashed, "foo/some_file", b"foo") - - -def test_delete_sub_directory_with_dot(): - second_layer = create_layer(("foo/some_file", b"foo"), ("foo/bar/another_file", b"bar")) - - first_layer = create_layer(("./foo/bar/", None)) - - squashed = squash_layers([first_layer, second_layer]) - - assertDoesNotHaveFile(squashed, "foo/bar/another_file") - assertHasFile(squashed, "foo/some_file", b"foo") - - -def test_delete_sub_directory_with_subdot(): - second_layer = create_layer(("./foo/some_file", b"foo"), ("./foo/bar/another_file", b"bar")) - - first_layer = create_layer(("foo/bar/", None)) - - squashed = squash_layers([first_layer, second_layer]) - - assertDoesNotHaveFile(squashed, "foo/bar/another_file") - assertDoesNotHaveFile(squashed, "./foo/bar/another_file") - assertHasFile(squashed, "./foo/some_file", b"foo") - - -def test_delete_directory_recreate(): - third_layer = create_layer(("foo/some_file", b"foo"), ("foo/another_file", b"bar")) - - second_layer = create_layer(("foo/", None)) - - first_layer = create_layer(("foo/some_file", b"baz")) - - squashed = squash_layers([first_layer, second_layer, third_layer]) - - assertHasFile(squashed, "foo/some_file", b"baz") - assertDoesNotHaveFile(squashed, "foo/another_file") - - -def test_delete_directory_prefix(): - third_layer = create_layer(("foobar/some_file", b"foo"), ("foo/another_file", b"bar")) - - second_layer = create_layer(("foo/", None)) - - squashed = squash_layers([second_layer, third_layer]) - - assertHasFile(squashed, "foobar/some_file", b"foo") - assertDoesNotHaveFile(squashed, "foo/another_file") - - -def test_delete_directory_pre_prefix(): - third_layer = create_layer(("foobar/baz/some_file", b"foo"), ("foo/another_file", b"bar")) - - second_layer = create_layer(("foo/", None)) - - squashed = squash_layers([second_layer, third_layer]) - - assertHasFile(squashed, "foobar/baz/some_file", b"foo") - assertDoesNotHaveFile(squashed, "foo/another_file") - - -def test_delete_root_directory(): - third_layer = create_layer(("build/first_file", b"foo"), ("build/second_file", b"bar")) - - second_layer = create_layer(("build", None)) - - squashed = squash_layers([second_layer, third_layer]) - - assertDoesNotHaveFile(squashed, "build/first_file") - assertDoesNotHaveFile(squashed, "build/second_file") - - -def test_tar_empty_layer(): - third_layer = create_layer(("build/first_file", b"foo"), ("build/second_file", b"bar")) - - empty_layer = create_layer() - - squashed = squash_layers([empty_layer, third_layer]) - - assertHasFile(squashed, "build/first_file", b"foo") - assertHasFile(squashed, "build/second_file", b"bar") - - -def test_data_empty_layer(): - third_layer = create_layer(("build/first_file", b"foo"), ("build/second_file", b"bar")) - - empty_layer = create_empty_layer() - - squashed = squash_layers([empty_layer, third_layer]) - - assertHasFile(squashed, "build/first_file", b"foo") - assertHasFile(squashed, "build/second_file", b"bar") - - -def test_broken_layer(): - third_layer = create_layer(("build/first_file", b"foo"), ("build/second_file", b"bar")) - - broken_layer = b"not valid data" - - with pytest.raises(TarLayerReadException): - squash_layers([broken_layer, third_layer]) - - -def test_single_layer_with_prefix(): - tar_layer = create_layer( - ("some_file", b"foo"), ("another_file", b"bar"), ("third_file", b"meh") - ) - - squashed = squash_layers([tar_layer], path_prefix="foo/") - - assertHasFile(squashed, "foo/some_file", b"foo") - assertHasFile(squashed, "foo/another_file", b"bar") - assertHasFile(squashed, "foo/third_file", b"meh") - - -def test_multiple_layers_overwrite_with_prefix(): - second_layer = create_layer( - ("some_file", b"foo"), ("another_file", b"bar"), ("third_file", b"meh") - ) - - first_layer = create_layer(("another_file", b"top")) - - squashed = squash_layers([first_layer, second_layer], path_prefix="foo/") - - assertHasFile(squashed, "foo/some_file", b"foo") - assertHasFile(squashed, "foo/third_file", b"meh") - assertHasFile(squashed, "foo/another_file", b"top") - - -def test_superlong_filename(): - tar_layer = create_layer( - ( - "this_is_the_filename_that_never_ends_it_goes_on_and_on_my_friend_some_people_started", - b"meh", - ) - ) - - squashed = squash_layers([tar_layer], path_prefix="foo/") - assertHasFile( - squashed, - "foo/this_is_the_filename_that_never_ends_it_goes_on_and_on_my_friend_some_people_started", - b"meh", - ) - - -def test_superlong_prefix(): - tar_layer = create_layer( - ("some_file", b"foo"), ("another_file", b"bar"), ("third_file", b"meh") - ) - - squashed = squash_layers( - [tar_layer], - path_prefix="foo/bar/baz/something/foo/bar/baz/anotherthing/whatever/this/is/a/really/long/filename/that/goes/here/", - ) - - assertHasFile( - squashed, - "foo/bar/baz/something/foo/bar/baz/anotherthing/whatever/this/is/a/really/long/filename/that/goes/here/some_file", - b"foo", - ) - assertHasFile( - squashed, - "foo/bar/baz/something/foo/bar/baz/anotherthing/whatever/this/is/a/really/long/filename/that/goes/here/another_file", - b"bar", - ) - assertHasFile( - squashed, - "foo/bar/baz/something/foo/bar/baz/anotherthing/whatever/this/is/a/really/long/filename/that/goes/here/third_file", - b"meh", - ) - - -def test_hardlink_to_deleted_file(): - first_layer = create_layer( - ("tobedeletedfile", b"somecontents"), - ("link_to_deleted_file", b"linkto:tobedeletedfile"), - ("third_file", b"meh"), - ) - - second_layer = create_layer(("tobedeletedfile", None)) - - squashed = squash_layers([second_layer, first_layer], path_prefix="foo/") - - assertHasFile(squashed, "foo/third_file", b"meh") - assertHasFile(squashed, "foo/link_to_deleted_file", b"somecontents") - assertDoesNotHaveFile(squashed, "foo/tobedeletedfile") - - -def test_multiple_hardlink_to_deleted_file(): - first_layer = create_layer( - ("tobedeletedfile", b"somecontents"), - ("link_to_deleted_file", b"linkto:tobedeletedfile"), - ("another_link_to_deleted_file", b"linkto:tobedeletedfile"), - ("third_file", b"meh"), - ) - - second_layer = create_layer(("tobedeletedfile", None)) - - squashed = squash_layers([second_layer, first_layer], path_prefix="foo/") - - assertHasFile(squashed, "foo/third_file", b"meh") - assertHasFile(squashed, "foo/link_to_deleted_file", b"somecontents") - assertHasFile(squashed, "foo/another_link_to_deleted_file", b"somecontents") - - assertDoesNotHaveFile(squashed, "foo/tobedeletedfile") diff --git a/util/repomirror/api.py b/util/repomirror/api.py index f76d3148b..2037026a7 100644 --- a/util/repomirror/api.py +++ b/util/repomirror/api.py @@ -96,7 +96,7 @@ class RepoMirrorAPIInterface(object): Posts the given repository to the repo mirror for processing, blocking until complete. Returns the analysis version on success or raises an exception deriving from - AnalyzeLayerException on failure. Callers should handle all cases of AnalyzeLayerException. + RepoMirrorException on failure. Callers should handle all cases of RepoMirrorException. """ pass diff --git a/util/secscan/analyzer.py b/util/secscan/analyzer.py deleted file mode 100644 index 91dc645a2..000000000 --- a/util/secscan/analyzer.py +++ /dev/null @@ -1,239 +0,0 @@ -import logging -import logging.config - -from collections import defaultdict - -import features - -from data.database import ExternalNotificationEvent, IMAGE_NOT_SCANNED_ENGINE_VERSION, Image -from data.model.oci.tag import filter_tags_have_repository_event, get_tags_for_legacy_image -from data.model.image import set_secscan_status, get_image_with_storage_and_parent_base -from notifications import spawn_notification -from util.secscan import PRIORITY_LEVELS -from util.secscan.api import ( - APIRequestFailure, - AnalyzeLayerException, - MissingParentLayerException, - InvalidLayerException, - AnalyzeLayerRetryException, -) -from util.morecollections import AttrDict - - -logger = logging.getLogger(__name__) - - -class PreemptedException(Exception): - """ - Exception raised if another worker analyzed the image before this worker was able to do so. - """ - - -class LayerAnalyzer(object): - """ - Helper class to perform analysis of a layer via the security scanner. - """ - - def __init__(self, config, api): - self._api = api - self._target_version = config.get("SECURITY_SCANNER_ENGINE_VERSION_TARGET", 2) - - def analyze_recursively(self, layer): - """ - Analyzes a layer and all its parents. - - Raises a PreemptedException if the analysis was preempted by another worker. - """ - try: - self._analyze_recursively_and_check(layer) - except MissingParentLayerException: - # The parent layer of this layer was missing. Force a reanalyze. - try: - self._analyze_recursively_and_check(layer, force_parents=True) - except MissingParentLayerException: - # Parent is still missing... mark the layer as invalid. - if not set_secscan_status(layer, False, self._target_version): - raise PreemptedException - - def _analyze_recursively_and_check(self, layer, force_parents=False): - """ - Analyzes a layer and all its parents, optionally forcing parents to be reanalyzed, and - checking for various exceptions that can occur during analysis. - """ - try: - self._analyze_recursively(layer, force_parents=force_parents) - except InvalidLayerException: - # One of the parent layers is invalid, so this layer is invalid as well. - if not set_secscan_status(layer, False, self._target_version): - raise PreemptedException - except AnalyzeLayerRetryException: - # Something went wrong when trying to analyze the layer, but we should retry, so leave - # the layer unindexed. Another worker will come along and handle it. - raise APIRequestFailure - except MissingParentLayerException: - # Pass upward, as missing parent is handled in the analyze_recursively method. - raise - except AnalyzeLayerException: - # Something went wrong when trying to analyze the layer and we cannot retry, so mark the - # layer as invalid. - logger.exception( - "Got exception when trying to analyze layer %s via security scanner", layer.id - ) - if not set_secscan_status(layer, False, self._target_version): - raise PreemptedException - - def _analyze_recursively(self, layer, force_parents=False): - # Check if there is a parent layer that needs to be analyzed. - if layer.parent_id and ( - force_parents or layer.parent.security_indexed_engine < self._target_version - ): - try: - base_query = get_image_with_storage_and_parent_base() - parent_layer = base_query.where(Image.id == layer.parent_id).get() - except Image.DoesNotExist: - logger.warning( - "Image %s has Image %s as parent but doesn't exist.", layer.id, layer.parent_id - ) - raise AnalyzeLayerException("Parent image not found") - - self._analyze_recursively(parent_layer, force_parents=force_parents) - - # Analyze the layer itself. - self._analyze(layer, force_parents=force_parents) - - def _analyze(self, layer, force_parents=False): - """ - Analyzes a single layer. - - Return a tuple of two bools: - - The first one tells us if we should evaluate its children. - - The second one is set to False when another worker pre-empted the candidate's analysis - for us. - """ - # If the parent couldn't be analyzed with the target version or higher, we can't analyze - # this image. Mark it as failed with the current target version. - if not force_parents and ( - layer.parent_id - and not layer.parent.security_indexed - and layer.parent.security_indexed_engine >= self._target_version - ): - if not set_secscan_status(layer, False, self._target_version): - raise PreemptedException - - # Nothing more to do. - return - - # Make sure the image's storage is not marked as uploading. If so, nothing more to do. - if layer.storage.uploading: - if not set_secscan_status(layer, False, self._target_version): - raise PreemptedException - - # Nothing more to do. - return - - # Analyze the image. - previously_security_indexed_successfully = layer.security_indexed - previous_security_indexed_engine = layer.security_indexed_engine - - logger.debug("Analyzing layer %s", layer.docker_image_id) - analyzed_version = self._api.analyze_layer(layer) - - logger.debug( - "Analyzed layer %s successfully with version %s", - layer.docker_image_id, - analyzed_version, - ) - - # Mark the image as analyzed. - if not set_secscan_status(layer, True, analyzed_version): - # If the image was previously successfully marked as resolved, then set_secscan_status - # might return False because we're not changing it (since this is a fixup). - if not previously_security_indexed_successfully: - raise PreemptedException - - # If we are the one who've done the job successfully first, then we need to decide if we should - # send notifications. Notifications are sent if: - # 1) This is a new layer - # 2) This is an existing layer that previously did not index properly - # We don't always send notifications as if we are re-indexing a successful layer for a newer - # feature set in the security scanner, notifications will be spammy. - is_new_image = previous_security_indexed_engine == IMAGE_NOT_SCANNED_ENGINE_VERSION - is_existing_image_unindexed = ( - not is_new_image and not previously_security_indexed_successfully - ) - if features.SECURITY_NOTIFICATIONS and (is_new_image or is_existing_image_unindexed): - # Get the tags of the layer we analyzed. - repository_map = defaultdict(list) - event = ExternalNotificationEvent.get(name="vulnerability_found") - - # NOTE: This should really use the registry_model, but as this whole analyzer is - # now deprecated, we'll keep calling into the model directly for the time being. - matching = list( - filter_tags_have_repository_event(get_tags_for_legacy_image(layer.id), event) - ) - - for tag in matching: - repository_map[tag.repository_id].append(tag) - - # If there is at least one tag, - # Lookup the vulnerabilities for the image, now that it is analyzed. - if len(repository_map) > 0: - logger.debug("Loading data for layer %s", layer.id) - try: - layer_data = self._api.get_layer_data(layer, include_vulnerabilities=True) - except APIRequestFailure: - raise - - if layer_data is not None: - # Dispatch events for any detected vulnerabilities - logger.debug("Got data for layer %s: %s", layer.id, layer_data) - found_features = layer_data["Layer"].get("Features", []) - for repository_id in repository_map: - tags = repository_map[repository_id] - vulnerabilities = dict() - - # Collect all the vulnerabilities found for the layer under each repository and send - # as a batch notification. - for feature in found_features: - if "Vulnerabilities" not in feature: - continue - - for vulnerability in feature.get("Vulnerabilities", []): - vuln_data = { - "id": vulnerability["Name"], - "description": vulnerability.get("Description", None), - "link": vulnerability.get("Link", None), - "has_fix": "FixedBy" in vulnerability, - # TODO: Change this key name if/when we change the event format. - "priority": vulnerability.get("Severity", "Unknown"), - } - - vulnerabilities[vulnerability["Name"]] = vuln_data - - # TODO: remove when more endpoints have been converted to using - # interfaces - repository = AttrDict( - { - "namespace_name": tags[0].repository.namespace_user.username, - "name": tags[0].repository.name, - } - ) - - repo_vulnerabilities = list(vulnerabilities.values()) - if not repo_vulnerabilities: - continue - - priority_key = lambda v: PRIORITY_LEVELS.get(v["priority"], {}).get( - "index", 100 - ) - repo_vulnerabilities.sort(key=priority_key) - - event_data = { - "tags": [tag.name for tag in tags], - "vulnerabilities": repo_vulnerabilities, - "vulnerability": repo_vulnerabilities[ - 0 - ], # For back-compat with existing events. - } - - spawn_notification(repository, "vulnerability_found", event_data) diff --git a/util/secscan/api.py b/util/secscan/api.py index a860cfaea..e5d6a22f0 100644 --- a/util/secscan/api.py +++ b/util/secscan/api.py @@ -8,14 +8,11 @@ from urllib.parse import urljoin import requests from data import model -from data.database import CloseForLongOperation, TagManifest, Image, Manifest, ManifestLegacyImage -from data.model.storage import get_storage_locations -from data.model.image import get_image_with_storage +from data.database import CloseForLongOperation, Image, Manifest, ManifestLegacyImage from data.registry_model.datatypes import Manifest as ManifestDataType, LegacyImage from util.abchelpers import nooper from util.failover import failover, FailoverException from util.secscan.validator import V2SecurityConfigValidator -from util.security.registry_jwt import generate_bearer_token, build_context_and_subject from _init import CONF_DIR @@ -30,31 +27,6 @@ DEFAULT_HTTP_HEADERS = {"Connection": "close"} logger = logging.getLogger(__name__) -class AnalyzeLayerException(Exception): - """ - Exception raised when a layer fails to analyze due to a request issue. - """ - - -class AnalyzeLayerRetryException(Exception): - """ - Exception raised when a layer fails to analyze due to a request issue, and the request should be - retried. - """ - - -class MissingParentLayerException(AnalyzeLayerException): - """ - Exception raised when the parent of the layer is missing from the security scanner. - """ - - -class InvalidLayerException(AnalyzeLayerException): - """ - Exception raised when the layer itself cannot be handled by the security scanner. - """ - - class APIRequestFailure(Exception): """ Exception raised when there is a failure to conduct an API request. @@ -71,11 +43,7 @@ class Non200ResponseException(Exception): self.response = response -_API_METHOD_INSERT = "layers" _API_METHOD_GET_LAYER = "layers/%s" -_API_METHOD_DELETE_LAYER = "layers/%s" -_API_METHOD_MARK_NOTIFICATION_READ = "notifications/%s" -_API_METHOD_GET_NOTIFICATION = "notifications/%s" _API_METHOD_PING = "metrics" @@ -83,18 +51,13 @@ def compute_layer_id(layer): """ Returns the ID for the layer in the security scanner. """ - # NOTE: this is temporary until we switch to Clair V3. - if isinstance(layer, ManifestDataType): - if layer._is_tag_manifest: - layer = TagManifest.get(id=layer._db_id).tag.image - else: - manifest = Manifest.get(id=layer._db_id) - try: - layer = ManifestLegacyImage.get(manifest=manifest).image - except ManifestLegacyImage.DoesNotExist: - return None - elif isinstance(layer, LegacyImage): - layer = Image.get(id=layer._db_id) + assert isinstance(layer, ManifestDataType) + + manifest = Manifest.get(id=layer._db_id) + try: + layer = ManifestLegacyImage.get(manifest=manifest).image + except ManifestLegacyImage.DoesNotExist: + return None assert layer.docker_image_id assert layer.storage.uuid @@ -147,14 +110,6 @@ class SecurityScannerAPIInterface(object): Helper class for talking to the Security Scan service (usually Clair). """ - @abstractmethod - def cleanup_layers(self, layers): - """ - Callback invoked by garbage collection to cleanup any layers that no longer need to be - stored in the security scanner. - """ - pass - @abstractmethod def ping(self): """ @@ -165,23 +120,6 @@ class SecurityScannerAPIInterface(object): """ pass - @abstractmethod - def delete_layer(self, layer): - """ - Calls DELETE on the given layer in the security scanner, removing it from its database. - """ - pass - - @abstractmethod - def analyze_layer(self, layer): - """ - Posts the given layer to the security scanner for analysis, blocking until complete. - - Returns the analysis version on success or raises an exception deriving from - AnalyzeLayerException on failure. Callers should handle all cases of AnalyzeLayerException. - """ - pass - @abstractmethod def check_layer_vulnerable(self, layer_id, cve_name): """ @@ -189,22 +127,6 @@ class SecurityScannerAPIInterface(object): """ pass - @abstractmethod - def get_notification(self, notification_name, layer_limit=100, page=None): - """ - Gets the data for a specific notification, with optional page token. - - Returns a tuple of the data (None on failure) and whether to retry. - """ - pass - - @abstractmethod - def mark_notification_read(self, notification_name): - """ - Marks a security scanner notification as read. - """ - pass - @abstractmethod def get_layer_data(self, layer, include_features=False, include_vulnerabilities=False): """ @@ -242,94 +164,6 @@ class ImplementedSecurityScannerAPI(SecurityScannerAPIInterface): self._target_version = config.get("SECURITY_SCANNER_ENGINE_VERSION_TARGET", 2) self._uri_creator = uri_creator - def _get_image_url_and_auth(self, image): - """ - Returns a tuple of the url and the auth header value that must be used to fetch the layer - data itself. - - If the image can't be addressed, we return None. - """ - if self._instance_keys is None: - raise Exception("No Instance keys provided to Security Scanner API") - - path = model.storage.get_layer_path(image.storage) - locations = self._default_storage_locations - - if not self._storage.exists(locations, path): - locations = get_storage_locations(image.storage.uuid) - if not locations or not self._storage.exists(locations, path): - logger.warning( - "Could not find a valid location to download layer %s out of %s", - compute_layer_id(image), - locations, - ) - return None, None - - uri = self._storage.get_direct_download_url(locations, path) - auth_header = None - if uri is None: - # Use the registry API instead, with a signed JWT giving access - repo_name = image.repository.name - namespace_name = image.repository.namespace_user.username - repository_and_namespace = "/".join([namespace_name, repo_name]) - - # Generate the JWT which will authorize this - audience = self._server_hostname - context, subject = build_context_and_subject() - access = [ - {"type": "repository", "name": repository_and_namespace, "actions": ["pull"],} - ] - - auth_token = generate_bearer_token( - audience, subject, context, access, TOKEN_VALIDITY_LIFETIME_S, self._instance_keys - ) - auth_header = "Bearer " + auth_token.decode("ascii") - - uri = self._uri_creator(repository_and_namespace, image.storage.content_checksum) - - return uri, auth_header - - def _new_analyze_request(self, layer): - """ - Create the request body to submit the given layer for analysis. - - If the layer's URL cannot be found, returns None. - """ - layer_id = compute_layer_id(layer) - if layer_id is None: - return None - - url, auth_header = self._get_image_url_and_auth(layer) - if url is None: - return None - - layer_request = { - "Name": layer_id, - "Path": url, - "Format": "Docker", - } - - if auth_header is not None: - layer_request["Headers"] = { - "Authorization": auth_header, - } - - if layer.parent is not None: - if layer.parent.docker_image_id and layer.parent.storage.uuid: - layer_request["ParentName"] = compute_layer_id(layer.parent) - - return { - "Layer": layer_request, - } - - def cleanup_layers(self, layers): - """ - Callback invoked by garbage collection to cleanup any layers that no longer need to be - stored in the security scanner. - """ - for layer in layers: - self.delete_layer(layer) - def ping(self): """ Calls GET on the metrics endpoint of the security scanner to ensure it is running and @@ -355,95 +189,6 @@ class ImplementedSecurityScannerAPI(SecurityScannerAPIInterface): msg = "Exception when trying to connect to security scanner endpoint: %s" % ve raise Exception(msg) - def delete_layer(self, layer): - """ - Calls DELETE on the given layer in the security scanner, removing it from its database. - """ - layer_id = compute_layer_id(layer) - if layer_id is None: - return None - - # NOTE: We are adding an extra check here for the time being just to be sure we're - # not hitting any overlap. - docker_image_id, layer_storage_uuid = layer_id.split(".") - if get_image_with_storage(docker_image_id, layer_storage_uuid): - logger.warning("Found shared Docker ID and storage for layer %s", layer_id) - return False - - try: - self._call("DELETE", _API_METHOD_DELETE_LAYER % layer_id) - return True - except Non200ResponseException: - return False - except requests.exceptions.RequestException: - logger.exception("Failed to delete layer: %s", layer_id) - return False - - def analyze_layer(self, layer): - """ - Posts the given layer to the security scanner for analysis, blocking until complete. - - Returns the analysis version on success or raises an exception deriving from - AnalyzeLayerException on failure. Callers should handle all cases of AnalyzeLayerException. - """ - - def _response_json(request, response): - try: - return response.json() - except ValueError: - logger.exception( - "Failed to decode JSON when analyzing layer %s", request["Layer"]["Name"] - ) - raise AnalyzeLayerException - - request = self._new_analyze_request(layer) - if not request: - logger.error("Could not build analyze request for layer %s", layer.id) - raise AnalyzeLayerException - - logger.debug("Analyzing layer %s", request["Layer"]["Name"]) - try: - response = self._call("POST", _API_METHOD_INSERT, body=request) - except requests.exceptions.Timeout: - logger.exception("Timeout when trying to post layer data response for %s", layer.id) - raise AnalyzeLayerRetryException - except requests.exceptions.ConnectionError: - logger.exception( - "Connection error when trying to post layer data response for %s", layer.id - ) - raise AnalyzeLayerRetryException - except (requests.exceptions.RequestException) as re: - logger.exception("Failed to post layer data response for %s: %s", layer.id, re) - raise AnalyzeLayerException - except Non200ResponseException as ex: - message = _response_json(request, ex.response).get("Error").get("Message", "") - logger.warning( - "A warning event occurred when analyzing layer %s (status code %s): %s", - request["Layer"]["Name"], - ex.response.status_code, - message, - ) - # 400 means the layer could not be analyzed due to a bad request. - if ex.response.status_code == 400: - if message == UNKNOWN_PARENT_LAYER_ERROR_MSG: - raise MissingParentLayerException( - "Bad request to security scanner: %s" % message - ) - else: - logger.exception("Got non-200 response for analyze of layer %s", layer.id) - raise AnalyzeLayerException("Bad request to security scanner: %s" % message) - # 422 means that the layer could not be analyzed: - # - the layer could not be extracted (might be a manifest or an invalid .tar.gz) - # - the layer operating system / package manager is unsupported - elif ex.response.status_code == 422: - raise InvalidLayerException - - # Otherwise, it is some other error and we should retry. - raise AnalyzeLayerRetryException - - # Return the parsed API version. - return _response_json(request, response)["Layer"]["IndexedByVersion"] - def check_layer_vulnerable(self, layer_id, cve_name): """ Checks to see if the layer with the given ID is vulnerable to the specified CVE. @@ -459,51 +204,6 @@ class ImplementedSecurityScannerAPI(SecurityScannerAPIInterface): return False - def get_notification(self, notification_name, layer_limit=100, page=None): - """ - Gets the data for a specific notification, with optional page token. - - Returns a tuple of the data (None on failure) and whether to retry. - """ - try: - params = {"limit": layer_limit} - - if page is not None: - params["page"] = page - - response = self._call( - "GET", _API_METHOD_GET_NOTIFICATION % notification_name, params=params - ) - json_response = response.json() - except requests.exceptions.Timeout: - logger.exception("Timeout when trying to get notification for %s", notification_name) - return None, True - except requests.exceptions.ConnectionError: - logger.exception( - "Connection error when trying to get notification for %s", notification_name - ) - return None, True - except (requests.exceptions.RequestException, ValueError): - logger.exception("Failed to get notification for %s", notification_name) - return None, False - except Non200ResponseException as ex: - return None, ex.response.status_code != 404 and ex.response.status_code != 400 - - return json_response, False - - def mark_notification_read(self, notification_name): - """ - Marks a security scanner notification as read. - """ - try: - self._call("DELETE", _API_METHOD_MARK_NOTIFICATION_READ % notification_name) - return True - except Non200ResponseException: - return False - except requests.exceptions.RequestException: - logger.exception("Failed to mark notification as read: %s", notification_name) - return False - def get_layer_data(self, layer, include_features=False, include_vulnerabilities=False): """ Returns the layer data for the specified layer. diff --git a/util/secscan/fake.py b/util/secscan/fake.py index 1cba85d35..696762fc2 100644 --- a/util/secscan/fake.py +++ b/util/secscan/fake.py @@ -32,7 +32,6 @@ class FakeSecurityScanner(object): self.hostname = hostname self.index_version = index_version self.layers = {} - self.notifications = {} self.layer_vulns = {} self.ok_layer_id = None @@ -84,42 +83,6 @@ class FakeSecurityScanner(object): """ return layer_id in self.layers - def has_notification(self, notification_id): - """ - Returns whether a notification with the given ID is found in the scanner. - """ - return notification_id in self.notifications - - def add_notification( - self, - old_layer_ids, - new_layer_ids, - old_vuln, - new_vuln, - max_per_page=100, - indexed_old_layer_ids=None, - indexed_new_layer_ids=None, - ): - """ - Adds a new notification over the given sets of layer IDs and vulnerability information, - returning the structural data of the notification created. - """ - notification_id = str(uuid.uuid4()) - if old_vuln is None: - old_vuln = dict(new_vuln) - - self.notifications[notification_id] = dict( - old_layer_ids=old_layer_ids, - new_layer_ids=new_layer_ids, - old_vuln=old_vuln, - new_vuln=new_vuln, - max_per_page=max_per_page, - indexed_old_layer_ids=indexed_old_layer_ids, - indexed_new_layer_ids=indexed_new_layer_ids, - ) - - return self._get_notification_data(notification_id, 0, 100) - def layer_id(self, layer): """ Returns the Quay Security Scanner layer ID for the given layer (Image row). @@ -162,62 +125,6 @@ class FakeSecurityScanner(object): } ) - def _get_notification_data(self, notification_id, page, limit): - """ - Returns the structural data for the notification with the given ID, paginated using the - given page and limit. - """ - notification = self.notifications[notification_id] - limit = min(limit, notification["max_per_page"]) - - notification_data = { - "Name": notification_id, - "Created": "1456247389", - "Notified": "1456246708", - "Limit": limit, - } - - start_index = page * limit - end_index = (page + 1) * limit - has_additional_page = False - - if notification.get("old_vuln"): - old_layer_ids = notification["old_layer_ids"] - old_layer_ids = old_layer_ids[start_index:end_index] - has_additional_page = has_additional_page or bool(len(old_layer_ids[end_index - 1 :])) - - notification_data["Old"] = { - "Vulnerability": notification["old_vuln"], - "LayersIntroducingVulnerability": old_layer_ids, - } - - if notification.get("indexed_old_layer_ids", None): - indexed_old_layer_ids = notification["indexed_old_layer_ids"][start_index:end_index] - notification_data["Old"][ - "OrderedLayersIntroducingVulnerability" - ] = indexed_old_layer_ids - - if notification.get("new_vuln"): - new_layer_ids = notification["new_layer_ids"] - new_layer_ids = new_layer_ids[start_index:end_index] - has_additional_page = has_additional_page or bool(len(new_layer_ids[end_index - 1 :])) - - notification_data["New"] = { - "Vulnerability": notification["new_vuln"], - "LayersIntroducingVulnerability": new_layer_ids, - } - - if notification.get("indexed_new_layer_ids", None): - indexed_new_layer_ids = notification["indexed_new_layer_ids"][start_index:end_index] - notification_data["New"][ - "OrderedLayersIntroducingVulnerability" - ] = indexed_new_layer_ids - - if has_additional_page: - notification_data["NextPage"] = str(page + 1) - - return notification_data - def get_endpoints(self): """ Returns the HTTMock endpoint definitions for the fake security scanner. @@ -338,43 +245,6 @@ class FakeSecurityScanner(object): "content": json.dumps({"Layer": self.layers[layer["Name"]],}), } - @urlmatch( - netloc=r"(.*\.)?" + self.hostname, path=r"/v1/notifications/(.+)$", method="DELETE" - ) - def delete_notification(url, _): - notification_id = url.path[len("/v1/notifications/") :] - if notification_id not in self.notifications: - return { - "status_code": 404, - "content": json.dumps({"Error": {"Message": "Unknown notification"}}), - } - - self.notifications.pop(notification_id) - return { - "status_code": 204, - "content": "", - } - - @urlmatch(netloc=r"(.*\.)?" + self.hostname, path=r"/v1/notifications/(.+)$", method="GET") - def get_notification(url, _): - notification_id = url.path[len("/v1/notifications/") :] - if notification_id not in self.notifications: - return { - "status_code": 404, - "content": json.dumps({"Error": {"Message": "Unknown notification"}}), - } - - query_params = urllib.parse.parse_qs(url.query) - limit = int(query_params.get("limit", [2])[0]) - page = int(query_params.get("page", [0])[0]) - - notification_data = self._get_notification_data(notification_id, page, limit) - response = {"Notification": notification_data} - return { - "status_code": 200, - "content": json.dumps(response), - } - @urlmatch(netloc=r"(.*\.)?" + self.hostname, path=r"/v1/metrics$", method="GET") def metrics(url, _): return { @@ -393,8 +263,6 @@ class FakeSecurityScanner(object): get_layer_mock, post_layer_mock, remove_layer_mock, - get_notification, - delete_notification, metrics, response_content, ] diff --git a/util/secscan/notifier.py b/util/secscan/notifier.py deleted file mode 100644 index 9d1db5df1..000000000 --- a/util/secscan/notifier.py +++ /dev/null @@ -1,205 +0,0 @@ -import logging -import sys - -from collections import defaultdict -from enum import Enum - -from data.registry_model import registry_model -from notifications import notification_batch -from util.secscan import PRIORITY_LEVELS -from util.secscan.api import APIRequestFailure -from util.morecollections import AttrDict, StreamingDiffTracker, IndexedStreamingDiffTracker - - -logger = logging.getLogger(__name__) - - -class ProcessNotificationPageResult(Enum): - FINISHED_PAGE = "Finished Page" - FINISHED_PROCESSING = "Finished Processing" - FAILED = "Failed" - - -class SecurityNotificationHandler(object): - """ - Class to process paginated notifications from the security scanner and issue Quay - vulnerability_found notifications for all necessary tags. Callers should initialize, call - process_notification_page_data for each page until it returns FINISHED_PROCESSING or FAILED and, - if succeeded, then call send_notifications to send out the notifications queued. - - NOTE: This is legacy code and should be removed once we're fully moved to Clair V4. - """ - - def __init__(self, legacy_secscan_api, results_per_stream): - self.tags_by_repository_map = defaultdict(set) - self.repository_map = {} - self.check_map = {} - self.layer_ids = set() - self.legacy_secscan_api = legacy_secscan_api - - self.stream_tracker = None - self.results_per_stream = results_per_stream - self.vulnerability_info = None - - def send_notifications(self): - """ - Sends all queued up notifications. - """ - if self.vulnerability_info is None: - return - - new_vuln = self.vulnerability_info - new_severity = PRIORITY_LEVELS.get( - new_vuln.get("Severity", "Unknown"), {"index": sys.maxsize} - ) - - # For each of the tags found, issue a notification. - with notification_batch() as spawn_notification: - for repository_id, tags in self.tags_by_repository_map.items(): - event_data = { - "tags": list(tags), - "vulnerability": { - "id": new_vuln["Name"], - "description": new_vuln.get("Description", None), - "link": new_vuln.get("Link", None), - "priority": new_severity["title"], - "has_fix": "FixedIn" in new_vuln, - }, - } - - spawn_notification( - self.repository_map[repository_id], "vulnerability_found", event_data - ) - - def process_notification_page_data(self, notification_page_data): - """ - Processes the given notification page data to spawn vulnerability notifications as - necessary. - - Returns the status of the processing. - """ - if not "New" in notification_page_data: - return self._done() - - new_data = notification_page_data["New"] - old_data = notification_page_data.get("Old", {}) - - new_vuln = new_data["Vulnerability"] - old_vuln = old_data.get("Vulnerability", {}) - - self.vulnerability_info = new_vuln - - new_layer_ids = new_data.get("LayersIntroducingVulnerability", []) - old_layer_ids = old_data.get("LayersIntroducingVulnerability", []) - - new_severity = PRIORITY_LEVELS.get( - new_vuln.get("Severity", "Unknown"), {"index": sys.maxsize} - ) - old_severity = PRIORITY_LEVELS.get( - old_vuln.get("Severity", "Unknown"), {"index": sys.maxsize} - ) - - # Check if the severity of the vulnerability has increased. If so, then we report this - # vulnerability for *all* layers, rather than a difference, as it is important for everyone. - if new_severity["index"] < old_severity["index"]: - # The vulnerability has had its severity increased. Report for *all* layers. - all_layer_ids = set(new_layer_ids) | set(old_layer_ids) - for layer_id in all_layer_ids: - self._report(layer_id) - - if "NextPage" not in notification_page_data: - return self._done() - else: - return ProcessNotificationPageResult.FINISHED_PAGE - - # Otherwise, only send the notification to new layers. To find only the new layers, we - # need to do a streaming diff vs the old layer IDs stream. - - # Check for ordered data. If found, we use the indexed tracker, which is faster and - # more memory efficient. - is_indexed = False - if ( - "OrderedLayersIntroducingVulnerability" in new_data - or "OrderedLayersIntroducingVulnerability" in old_data - ): - - def tuplize(stream): - return [(entry["LayerName"], entry["Index"]) for entry in stream] - - new_layer_ids = tuplize(new_data.get("OrderedLayersIntroducingVulnerability", [])) - old_layer_ids = tuplize(old_data.get("OrderedLayersIntroducingVulnerability", [])) - is_indexed = True - - # If this is the first call, initialize the tracker. - if self.stream_tracker is None: - self.stream_tracker = ( - IndexedStreamingDiffTracker(self._report, self.results_per_stream) - if is_indexed - else StreamingDiffTracker(self._report, self.results_per_stream) - ) - - # Call to add the old and new layer ID streams to the tracker. The tracker itself will - # call _report whenever it has determined a new layer has been found. - self.stream_tracker.push_new(new_layer_ids) - self.stream_tracker.push_old(old_layer_ids) - - # Check to see if there are any additional pages to process. - if "NextPage" not in notification_page_data: - return self._done() - else: - return ProcessNotificationPageResult.FINISHED_PAGE - - def _done(self): - if self.stream_tracker is not None: - # Mark the tracker as done, so that it finishes reporting any outstanding layers. - self.stream_tracker.done() - - # Process all the layers. - if self.vulnerability_info is not None: - if not self._process_layers(): - return ProcessNotificationPageResult.FAILED - - return ProcessNotificationPageResult.FINISHED_PROCESSING - - def _report(self, new_layer_id): - self.layer_ids.add(new_layer_id) - - def _chunk(self, pairs, chunk_size): - start_index = 0 - while start_index < len(pairs): - yield pairs[start_index:chunk_size] - start_index += chunk_size - - def _process_layers(self): - cve_id = self.vulnerability_info["Name"] - - # Builds the pairs of layer ID and storage uuid. - pairs = [tuple(layer_id.split(".", 2)) for layer_id in self.layer_ids] - - # Find the matching tags. - for current_pairs in self._chunk(pairs, 50): - tags = list(registry_model.yield_tags_for_vulnerability_notification(current_pairs)) - for tag in tags: - # Verify that the tag's *top layer* has the vulnerability. - if not tag.layer_id in self.check_map: - logger.debug("Checking if layer %s is vulnerable to %s", tag.layer_id, cve_id) - try: - self.check_map[ - tag.layer_id - ] = self.legacy_secscan_api.check_layer_vulnerable(tag.layer_id, cve_id) - except APIRequestFailure: - return False - - logger.debug( - "Result of layer %s is vulnerable to %s check: %s", - tag.layer_id, - cve_id, - self.check_map[tag.layer_id], - ) - - if self.check_map[tag.layer_id]: - # Add the vulnerable tag to the list. - self.tags_by_repository_map[tag.repository.id].add(tag.name) - self.repository_map[tag.repository.id] = tag.repository - - return True diff --git a/util/secscan/v4/test/test_secscan.py b/util/secscan/v4/test/test_secscan.py index fc3ecf5cc..d91116fc1 100644 --- a/util/secscan/v4/test/test_secscan.py +++ b/util/secscan/v4/test/test_secscan.py @@ -18,9 +18,8 @@ from app import app def manifest_for(namespace, repository, tagname): repository_ref = registry_model.lookup_repository(namespace, repository) - tag = registry_model.get_repo_tag(repository_ref, tagname, include_legacy_image=True) - - return registry_model.get_manifest_for_tag(tag, backfill_if_necessary=True) + tag = registry_model.get_repo_tag(repository_ref, tagname) + return registry_model.get_manifest_for_tag(tag) @pytest.fixture() diff --git a/util/security/signing.py b/util/security/signing.py deleted file mode 100644 index 2eb7c6880..000000000 --- a/util/security/signing.py +++ /dev/null @@ -1,87 +0,0 @@ -import gpg -import features -import logging -from io import BytesIO - - -logger = logging.getLogger(__name__) - - -class GPG2Signer(object): - """ - Helper class for signing data using GPG2. - """ - - def __init__(self, config, config_provider): - if not config.get("GPG2_PRIVATE_KEY_NAME"): - raise Exception("Missing configuration key GPG2_PRIVATE_KEY_NAME") - - if not config.get("GPG2_PRIVATE_KEY_FILENAME"): - raise Exception("Missing configuration key GPG2_PRIVATE_KEY_FILENAME") - - if not config.get("GPG2_PUBLIC_KEY_FILENAME"): - raise Exception("Missing configuration key GPG2_PUBLIC_KEY_FILENAME") - - self._ctx = gpg.Context() - self._ctx.armor = True - self._private_key_name = config["GPG2_PRIVATE_KEY_NAME"] - self._public_key_filename = config["GPG2_PUBLIC_KEY_FILENAME"] - self._config_provider = config_provider - - if not config_provider.volume_file_exists(config["GPG2_PRIVATE_KEY_FILENAME"]): - raise Exception("Missing key file %s" % config["GPG2_PRIVATE_KEY_FILENAME"]) - - with config_provider.get_volume_file(config["GPG2_PRIVATE_KEY_FILENAME"], mode="rb") as fp: - self._ctx.op_import(fp) - - @property - def name(self): - return "gpg2" - - def open_public_key_file(self): - return self._config_provider.get_volume_file(self._public_key_filename, mode="rb") - - def detached_sign(self, stream): - """ - Signs the given byte-like stream, returning the signature. - """ - ctx = self._ctx - try: - ctx.signers = [ctx.get_key(self._private_key_name, 0)] - except: - raise Exception("Invalid private key name") - - data = stream.read() - if not isinstance(data, bytes): - raise TypeError("Stream is not byte-like") - - sign_res = ctx.sign(data, mode=gpg.constants.sig.mode.DETACH) - return sign_res[0] - - -class Signer(object): - def __init__(self, app=None, config_provider=None): - self.app = app - if app is not None: - self.state = self.init_app(app, config_provider) - else: - self.state = None - - def init_app(self, app, config_provider): - preference = app.config.get("SIGNING_ENGINE", None) - if preference is None: - return None - - if not features.ACI_CONVERSION: - return None - - try: - return SIGNING_ENGINES[preference](app.config, config_provider) - except: - logger.exception("Could not initialize signing engine") - - def __getattr__(self, name): - return getattr(self.state, name, None) - - -SIGNING_ENGINES = {"gpg2": GPG2Signer} diff --git a/util/security/test/test_signing.py b/util/security/test/test_signing.py deleted file mode 100644 index 2965dae02..000000000 --- a/util/security/test/test_signing.py +++ /dev/null @@ -1,28 +0,0 @@ -import pytest -from io import StringIO, BytesIO - -from app import app, config_provider -from util.security.signing import Signer - - -@pytest.fixture(params=["gpg2"]) -def signer(request): - app.config["SIGNING_ENGINE"] = request.param - return Signer(app, config_provider) - - -@pytest.mark.parametrize( - "data, expected_exception", - [ - ("Unicode strings not allowed", AttributeError), - (StringIO("Not OK, because this does not implement buffer protocol"), TypeError), - (b"bytes are not ok. It should be wrapped in a file-like object", AttributeError), - (BytesIO(b"Thisisfine"), None), - ], -) -def test_detached_sign(data, expected_exception, signer): - if expected_exception is not None: - with pytest.raises(expected_exception): - signer.detached_sign(data) - else: - signer.detached_sign(data) diff --git a/util/test/test_workers.py b/util/test/test_workers.py index 9a8796043..78fb8a493 100644 --- a/util/test/test_workers.py +++ b/util/test/test_workers.py @@ -28,10 +28,6 @@ from util.workers import get_worker_count ("registry", {"WORKER_COUNT": 1,}, [0, 1], 10, 8, 64, 8), # Override always uses specific first. ("registry", {"WORKER_COUNT_REGISTRY": 120, "WORKER_COUNT": 12,}, [0, 1], 10, 8, 64, 120), - # Non-matching override. - ("verbs", {"WORKER_COUNT_REGISTRY": 120,}, [0, 1], 10, 8, 64, 20), - # Zero worker count (use defaults). - ("verbs", {"WORKER_COUNT": 0,}, [0, 1], 10, 8, 64, 8), ], ) def test_get_worker_count( diff --git a/util/vendor/__init__.py b/util/vendor/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/util/vendor/paxtarfile.py b/util/vendor/paxtarfile.py deleted file mode 100644 index 5e13c9303..000000000 --- a/util/vendor/paxtarfile.py +++ /dev/null @@ -1,2885 +0,0 @@ -# -*- coding: iso-8859-1 -*- - -# This version of tarfile was taken from python 2.7.10, and amended -# to fix a problem trying to decode non-text header fields present -# in some tar files using pax headers and/or extended attributes. - -# ------------------------------------------------------------------- -# tarfile.py -# ------------------------------------------------------------------- -# Copyright (C) 2002 Lars Gustäbel -# All rights reserved. -# -# Permission is hereby granted, free of charge, to any person -# obtaining a copy of this software and associated documentation -# files (the "Software"), to deal in the Software without -# restriction, including without limitation the rights to use, -# copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following -# conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. -# -""" -Read from and write to tar format archives. -""" - -__version__ = "$Revision: 85213 $" -# $Source$ - -version = "0.9.0" -__author__ = "Lars Gustäbel (lars@gustaebel.de)" -__date__ = "$Date$" -__cvsid__ = "$Id$" -__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend." - -# --------- -# Imports -# --------- -from builtins import open as bltn_open -import sys -import os -import shutil -import stat -import errno -import time -import struct -import copy -import re -import operator - -try: - import grp, pwd -except ImportError: - grp = pwd = None - -# from tarfile import * -__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"] - -# --------------------------------------------------------- -# tar constants -# --------------------------------------------------------- -NUL = "\0" # the null character -BLOCKSIZE = 512 # length of processing blocks -RECORDSIZE = BLOCKSIZE * 20 # length of records -GNU_MAGIC = "ustar \0" # magic gnu tar string -POSIX_MAGIC = "ustar\x0000" # magic posix tar string - -LENGTH_NAME = 100 # maximum length of a filename -LENGTH_LINK = 100 # maximum length of a linkname -LENGTH_PREFIX = 155 # maximum length of the prefix field - -REGTYPE = "0" # regular file -AREGTYPE = "\0" # regular file -LNKTYPE = "1" # link (inside tarfile) -SYMTYPE = "2" # symbolic link -CHRTYPE = "3" # character special device -BLKTYPE = "4" # block special device -DIRTYPE = "5" # directory -FIFOTYPE = "6" # fifo special device -CONTTYPE = "7" # contiguous file - -GNUTYPE_LONGNAME = "L" # GNU tar longname -GNUTYPE_LONGLINK = "K" # GNU tar longlink -GNUTYPE_SPARSE = "S" # GNU tar sparse file - -XHDTYPE = "x" # POSIX.1-2001 extended header -XGLTYPE = "g" # POSIX.1-2001 global header -SOLARIS_XHDTYPE = "X" # Solaris extended header - -USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format -GNU_FORMAT = 1 # GNU tar format -PAX_FORMAT = 2 # POSIX.1-2001 (pax) format -DEFAULT_FORMAT = GNU_FORMAT - -# --------------------------------------------------------- -# tarfile constants -# --------------------------------------------------------- -# File types that tarfile supports: -SUPPORTED_TYPES = ( - REGTYPE, - AREGTYPE, - LNKTYPE, - SYMTYPE, - DIRTYPE, - FIFOTYPE, - CONTTYPE, - CHRTYPE, - BLKTYPE, - GNUTYPE_LONGNAME, - GNUTYPE_LONGLINK, - GNUTYPE_SPARSE, -) - -# File types that will be treated as a regular file. -REGULAR_TYPES = (REGTYPE, AREGTYPE, CONTTYPE, GNUTYPE_SPARSE) - -# File types that are part of the GNU tar format. -GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK, GNUTYPE_SPARSE) - -# Fields from a pax header that override a TarInfo attribute. -PAX_FIELDS = ("path", "linkpath", "size", "mtime", "uid", "gid", "uname", "gname", "SCHILY.xattr.") - -# Fields in a pax header that are numbers, all other fields -# are treated as strings. -PAX_NUMBER_FIELDS = { - "atime": float, - "ctime": float, - "mtime": float, - "uid": int, - "gid": int, - "size": int, - "SCHILY.xattr.": dict, -} - -# --------------------------------------------------------- -# Bits used in the mode field, values in octal. -# --------------------------------------------------------- -S_IFLNK = 0o120000 # symbolic link -S_IFREG = 0o100000 # regular file -S_IFBLK = 0o060000 # block device -S_IFDIR = 0o040000 # directory -S_IFCHR = 0o020000 # character device -S_IFIFO = 0o010000 # fifo - -TSUID = 0o4000 # set UID on execution -TSGID = 0o2000 # set GID on execution -TSVTX = 0o1000 # reserved - -TUREAD = 0o400 # read by owner -TUWRITE = 0o200 # write by owner -TUEXEC = 0o100 # execute/search by owner -TGREAD = 0o040 # read by group -TGWRITE = 0o020 # write by group -TGEXEC = 0o010 # execute/search by group -TOREAD = 0o004 # read by other -TOWRITE = 0o002 # write by other -TOEXEC = 0o001 # execute/search by other - -# --------------------------------------------------------- -# initialization -# --------------------------------------------------------- -ENCODING = sys.getfilesystemencoding() -if ENCODING is None: - ENCODING = sys.getdefaultencoding() - -# --------------------------------------------------------- -# Some useful functions -# --------------------------------------------------------- - - -def stn(s, length): - """ - Convert a python string to a null-terminated string buffer. - """ - return s[:length] + (length - len(s)) * NUL - - -def nts(s): - """ - Convert a null-terminated string field to a python string. - """ - # Use the string up to the first null char. - p = s.find("\0") - if p == -1: - return s - return s[:p] - - -def nti(s): - """ - Convert a number field to a python number. - """ - # There are two possible encodings for a number field, see - # itn() below. - if s[0] != chr(0o200): - try: - n = int(nts(s).strip() or "0", 8) - except ValueError: - raise InvalidHeaderError("invalid header") - else: - n = 0 - for i in range(len(s) - 1): - n <<= 8 - n += ord(s[i + 1]) - return n - - -def itn(n, digits=8, format=DEFAULT_FORMAT): - """ - Convert a python number to a number field. - """ - # POSIX 1003.1-1988 requires numbers to be encoded as a string of - # octal digits followed by a null-byte, this allows values up to - # (8**(digits-1))-1. GNU tar allows storing numbers greater than - # that if necessary. A leading 0200 byte indicates this particular - # encoding, the following digits-1 bytes are a big-endian - # representation. This allows values up to (256**(digits-1))-1. - if 0 <= n < 8 ** (digits - 1): - s = "%0*o" % (digits - 1, n) + NUL - else: - if format != GNU_FORMAT or n >= 256 ** (digits - 1): - raise ValueError("overflow in number field") - - if n < 0: - # XXX We mimic GNU tar's behaviour with negative numbers, - # this could raise OverflowError. - n = struct.unpack("L", struct.pack("l", n))[0] - - s = "" - for i in range(digits - 1): - s = chr(n & 0o377) + s - n >>= 8 - s = chr(0o200) + s - return s - - -def uts(s, encoding, errors): - """ - Convert a unicode object to a string. - """ - if errors == "utf-8": - # An extra error handler similar to the -o invalid=UTF-8 option - # in POSIX.1-2001. Replace untranslatable characters with their - # UTF-8 representation. - try: - return s.encode(encoding, "strict") - except UnicodeEncodeError: - x = [] - for c in s: - try: - x.append(c.encode(encoding, "strict")) - except UnicodeEncodeError: - x.append(c.encode("utf8")) - return "".join(x) - else: - return s.encode(encoding, errors) - - -def calc_chksums(buf): - """ - Calculate the checksum for a member's header by summing up all characters except for the chksum - field which is treated as if it was filled with spaces. - - According to the GNU tar sources, some tars (Sun and NeXT) calculate chksum with signed char, - which will be different if there are chars in the buffer with the high bit set. So we calculate - two checksums, unsigned and signed. - """ - unsigned_chksum = 256 + sum( - struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]) - ) - signed_chksum = 256 + sum( - struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]) - ) - return unsigned_chksum, signed_chksum - - -def copyfileobj(src, dst, length=None): - """ - Copy length bytes from fileobj src to fileobj dst. - - If length is None, copy the entire content. - """ - if length == 0: - return - if length is None: - shutil.copyfileobj(src, dst) - return - - BUFSIZE = 16 * 1024 - blocks, remainder = divmod(length, BUFSIZE) - for b in range(blocks): - buf = src.read(BUFSIZE) - if len(buf) < BUFSIZE: - raise IOError("end of file reached") - dst.write(buf) - - if remainder != 0: - buf = src.read(remainder) - if len(buf) < remainder: - raise IOError("end of file reached") - dst.write(buf) - return - - -filemode_table = ( - ( - (S_IFLNK, "l"), - (S_IFREG, "-"), - (S_IFBLK, "b"), - (S_IFDIR, "d"), - (S_IFCHR, "c"), - (S_IFIFO, "p"), - ), - ((TUREAD, "r"),), - ((TUWRITE, "w"),), - ((TUEXEC | TSUID, "s"), (TSUID, "S"), (TUEXEC, "x")), - ((TGREAD, "r"),), - ((TGWRITE, "w"),), - ((TGEXEC | TSGID, "s"), (TSGID, "S"), (TGEXEC, "x")), - ((TOREAD, "r"),), - ((TOWRITE, "w"),), - ((TOEXEC | TSVTX, "t"), (TSVTX, "T"), (TOEXEC, "x")), -) - - -def filemode(mode): - """ - Convert a file's mode to a string of the form. - - -rwxrwxrwx. - Used by TarFile.list() - """ - perm = [] - for table in filemode_table: - for bit, char in table: - if mode & bit == bit: - perm.append(char) - break - else: - perm.append("-") - return "".join(perm) - - -class TarError(Exception): - """ - Base exception. - """ - - pass - - -class ExtractError(TarError): - """ - General exception for extract errors. - """ - - pass - - -class ReadError(TarError): - """ - Exception for unreadable tar archives. - """ - - pass - - -class CompressionError(TarError): - """ - Exception for unavailable compression methods. - """ - - pass - - -class StreamError(TarError): - """ - Exception for unsupported operations on stream-like TarFiles. - """ - - pass - - -class HeaderError(TarError): - """ - Base exception for header errors. - """ - - pass - - -class EmptyHeaderError(HeaderError): - """ - Exception for empty headers. - """ - - pass - - -class TruncatedHeaderError(HeaderError): - """ - Exception for truncated headers. - """ - - pass - - -class EOFHeaderError(HeaderError): - """ - Exception for end of file headers. - """ - - pass - - -class InvalidHeaderError(HeaderError): - """ - Exception for invalid headers. - """ - - pass - - -class SubsequentHeaderError(HeaderError): - """ - Exception for missing and invalid extended headers. - """ - - pass - - -# --------------------------- -# internal stream interface -# --------------------------- -class _LowLevelFile: - """ - Low-level file object. - - Supports reading and writing. It is used instead of a regular file object for streaming access. - """ - - def __init__(self, name, mode): - mode = {"r": os.O_RDONLY, "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,}[mode] - if hasattr(os, "O_BINARY"): - mode |= os.O_BINARY - self.fd = os.open(name, mode, 0o666) - - def close(self): - os.close(self.fd) - - def read(self, size): - return os.read(self.fd, size) - - def write(self, s): - os.write(self.fd, s) - - -class _Stream: - """ - Class that serves as an adapter between TarFile and a stream-like object. The stream-like - object only needs to have a read() or write() method and is accessed blockwise. Use of gzip or - bzip2 compression is possible. A stream-like object could be for example: sys.stdin, sys.stdout, - a socket, a tape device etc. - - _Stream is intended to be used only internally. - """ - - def __init__(self, name, mode, comptype, fileobj, bufsize): - """ - Construct a _Stream object. - """ - self._extfileobj = True - if fileobj is None: - fileobj = _LowLevelFile(name, mode) - self._extfileobj = False - - if comptype == "*": - # Enable transparent compression detection for the - # stream interface - fileobj = _StreamProxy(fileobj) - comptype = fileobj.getcomptype() - - self.name = name or "" - self.mode = mode - self.comptype = comptype - self.fileobj = fileobj - self.bufsize = bufsize - self.buf = "" - self.pos = 0 - self.closed = False - - try: - if comptype == "gz": - try: - import zlib - except ImportError: - raise CompressionError("zlib module is not available") - self.zlib = zlib - self.crc = zlib.crc32("") & 0xFFFFFFFF - if mode == "r": - self._init_read_gz() - else: - self._init_write_gz() - - elif comptype == "bz2": - try: - import bz2 - except ImportError: - raise CompressionError("bz2 module is not available") - if mode == "r": - self.dbuf = "" - self.cmp = bz2.BZ2Decompressor() - else: - self.cmp = bz2.BZ2Compressor() - except: - if not self._extfileobj: - self.fileobj.close() - self.closed = True - raise - - def __del__(self): - if hasattr(self, "closed") and not self.closed: - self.close() - - def _init_write_gz(self): - """ - Initialize for writing with gzip compression. - """ - self.cmp = self.zlib.compressobj( - 9, self.zlib.DEFLATED, -self.zlib.MAX_WBITS, self.zlib.DEF_MEM_LEVEL, 0 - ) - timestamp = struct.pack(" self.bufsize: - self.fileobj.write(self.buf[: self.bufsize]) - self.buf = self.buf[self.bufsize :] - - def close(self): - """ - Close the _Stream object. - - No operation should be done on it afterwards. - """ - if self.closed: - return - - self.closed = True - try: - if self.mode == "w" and self.comptype != "tar": - self.buf += self.cmp.flush() - - if self.mode == "w" and self.buf: - self.fileobj.write(self.buf) - self.buf = "" - if self.comptype == "gz": - # The native zlib crc is an unsigned 32-bit integer, but - # the Python wrapper implicitly casts that to a signed C - # long. So, on a 32-bit box self.crc may "look negative", - # while the same crc on a 64-bit box may "look positive". - # To avoid irksome warnings from the `struct` module, force - # it to look positive on all boxes. - self.fileobj.write(struct.pack("= 0: - blocks, remainder = divmod(pos - self.pos, self.bufsize) - for i in range(blocks): - self.read(self.bufsize) - self.read(remainder) - else: - raise StreamError("seeking backwards is not allowed") - return self.pos - - def read(self, size=None): - """ - Return the next size number of bytes from the stream. - - If size is not defined, return all bytes of the stream up to EOF. - """ - if size is None: - t = [] - while True: - buf = self._read(self.bufsize) - if not buf: - break - t.append(buf) - buf = "".join(t) - else: - buf = self._read(size) - self.pos += len(buf) - return buf - - def _read(self, size): - """ - Return size bytes from the stream. - """ - if self.comptype == "tar": - return self.__read(size) - - c = len(self.dbuf) - t = [self.dbuf] - while c < size: - buf = self.__read(self.bufsize) - if not buf: - break - try: - buf = self.cmp.decompress(buf) - except IOError: - raise ReadError("invalid compressed data") - t.append(buf) - c += len(buf) - t = "".join(t) - self.dbuf = t[size:] - return t[:size] - - def __read(self, size): - """ - Return size bytes from stream. - - If internal buffer is empty, read another block from the stream. - """ - c = len(self.buf) - t = [self.buf] - while c < size: - buf = self.fileobj.read(self.bufsize) - if not buf: - break - t.append(buf) - c += len(buf) - t = "".join(t) - self.buf = t[size:] - return t[:size] - - -# class _Stream - - -class _StreamProxy(object): - """ - Small proxy class that enables transparent compression detection for the Stream interface (mode - 'r|*'). - """ - - def __init__(self, fileobj): - self.fileobj = fileobj - self.buf = self.fileobj.read(BLOCKSIZE) - - def read(self, size): - self.read = self.fileobj.read - return self.buf - - def getcomptype(self): - if self.buf.startswith("\037\213\010"): - return "gz" - if self.buf[0:3] == "BZh" and self.buf[4:10] == "1AY&SY": - return "bz2" - return "tar" - - def close(self): - self.fileobj.close() - - -# class StreamProxy - - -class _BZ2Proxy(object): - """ - Small proxy class that enables external file object support for "r:bz2" and "w:bz2" modes. - - This is actually a workaround for a limitation in bz2 module's BZ2File class which (unlike - gzip.GzipFile) has no support for a file object argument. - """ - - blocksize = 16 * 1024 - - def __init__(self, fileobj, mode): - self.fileobj = fileobj - self.mode = mode - self.name = getattr(self.fileobj, "name", None) - self.init() - - def init(self): - import bz2 - - self.pos = 0 - if self.mode == "r": - self.bz2obj = bz2.BZ2Decompressor() - self.fileobj.seek(0) - self.buf = "" - else: - self.bz2obj = bz2.BZ2Compressor() - - def read(self, size): - b = [self.buf] - x = len(self.buf) - while x < size: - raw = self.fileobj.read(self.blocksize) - if not raw: - break - data = self.bz2obj.decompress(raw) - b.append(data) - x += len(data) - self.buf = "".join(b) - - buf = self.buf[:size] - self.buf = self.buf[size:] - self.pos += len(buf) - return buf - - def seek(self, pos): - if pos < self.pos: - self.init() - self.read(pos - self.pos) - - def tell(self): - return self.pos - - def write(self, data): - self.pos += len(data) - raw = self.bz2obj.compress(data) - self.fileobj.write(raw) - - def close(self): - if self.mode == "w": - raw = self.bz2obj.flush() - self.fileobj.write(raw) - - -# class _BZ2Proxy - -# ------------------------ -# Extraction file object -# ------------------------ -class _FileInFile(object): - """ - A thin wrapper around an existing file object that provides a part of its data as an individual - file object. - """ - - def __init__(self, fileobj, offset, size, sparse=None): - self.fileobj = fileobj - self.offset = offset - self.size = size - self.sparse = sparse - self.position = 0 - - def tell(self): - """ - Return the current file position. - """ - return self.position - - def seek(self, position): - """ - Seek to a position in the file. - """ - self.position = position - - def read(self, size=None): - """ - Read data from the file. - """ - if size is None: - size = self.size - self.position - else: - size = min(size, self.size - self.position) - - if self.sparse is None: - return self.readnormal(size) - else: - return self.readsparse(size) - - def __read(self, size): - buf = self.fileobj.read(size) - if len(buf) != size: - raise ReadError("unexpected end of data") - return buf - - def readnormal(self, size): - """ - Read operation for regular files. - """ - self.fileobj.seek(self.offset + self.position) - self.position += size - return self.__read(size) - - def readsparse(self, size): - """ - Read operation for sparse files. - """ - data = [] - while size > 0: - buf = self.readsparsesection(size) - if not buf: - break - size -= len(buf) - data.append(buf) - return "".join(data) - - def readsparsesection(self, size): - """ - Read a single section of a sparse file. - """ - section = self.sparse.find(self.position) - - if section is None: - return "" - - size = min(size, section.offset + section.size - self.position) - - if isinstance(section, _data): - realpos = section.realpos + self.position - section.offset - self.fileobj.seek(self.offset + realpos) - self.position += size - return self.__read(size) - else: - self.position += size - return NUL * size - - -# class _FileInFile - - -class ExFileObject(object): - """ - File-like object for reading an archive member. - - Is returned by TarFile.extractfile(). - """ - - blocksize = 1024 - - def __init__(self, tarfile, tarinfo): - self.fileobj = _FileInFile( - tarfile.fileobj, tarinfo.offset_data, tarinfo.size, getattr(tarinfo, "sparse", None) - ) - self.name = tarinfo.name - self.mode = "r" - self.closed = False - self.size = tarinfo.size - - self.position = 0 - self.buffer = "" - - def read(self, size=None): - """ - Read at most size bytes from the file. - - If size is not present or None, read all data until EOF is reached. - """ - if self.closed: - raise ValueError("I/O operation on closed file") - - buf = "" - if self.buffer: - if size is None: - buf = self.buffer - self.buffer = "" - else: - buf = self.buffer[:size] - self.buffer = self.buffer[size:] - - if size is None: - buf += self.fileobj.read() - else: - buf += self.fileobj.read(size - len(buf)) - - self.position += len(buf) - return buf - - def readline(self, size=-1): - """ - Read one entire line from the file. - - If size is present and non-negative, return a string with at most that size, which may be an - incomplete line. - """ - if self.closed: - raise ValueError("I/O operation on closed file") - - if "\n" in self.buffer: - pos = self.buffer.find("\n") + 1 - else: - buffers = [self.buffer] - while True: - buf = self.fileobj.read(self.blocksize) - buffers.append(buf) - if not buf or "\n" in buf: - self.buffer = "".join(buffers) - pos = self.buffer.find("\n") + 1 - if pos == 0: - # no newline found. - pos = len(self.buffer) - break - - if size != -1: - pos = min(size, pos) - - buf = self.buffer[:pos] - self.buffer = self.buffer[pos:] - self.position += len(buf) - return buf - - def readlines(self): - """ - Return a list with all remaining lines. - """ - result = [] - while True: - line = self.readline() - if not line: - break - result.append(line) - return result - - def tell(self): - """ - Return the current file position. - """ - if self.closed: - raise ValueError("I/O operation on closed file") - - return self.position - - def seek(self, pos, whence=os.SEEK_SET): - """ - Seek to a position in the file. - """ - if self.closed: - raise ValueError("I/O operation on closed file") - - if whence == os.SEEK_SET: - self.position = min(max(pos, 0), self.size) - elif whence == os.SEEK_CUR: - if pos < 0: - self.position = max(self.position + pos, 0) - else: - self.position = min(self.position + pos, self.size) - elif whence == os.SEEK_END: - self.position = max(min(self.size + pos, self.size), 0) - else: - raise ValueError("Invalid argument") - - self.buffer = "" - self.fileobj.seek(self.position) - - def close(self): - """ - Close the file object. - """ - self.closed = True - - def __iter__(self): - """ - Get an iterator over the file's lines. - """ - while True: - line = self.readline() - if not line: - break - yield line - - -# class ExFileObject - -# ------------------ -# Exported Classes -# ------------------ -class TarInfo(object): - """ - Informational class which holds the details about an archive member given by a tar header block. - - TarInfo objects are returned by TarFile.getmember(), TarFile.getmembers() and - TarFile.gettarinfo() and are usually created internally. - """ - - def __init__(self, name=""): - """ - Construct a TarInfo object. - - name is the optional name of the member. - """ - self.name = name # member name - self.mode = 0o644 # file permissions - self.uid = 0 # user id - self.gid = 0 # group id - self.size = 0 # file size - self.mtime = 0 # modification time - self.chksum = 0 # header checksum - self.type = REGTYPE # member type - self.linkname = "" # link name - self.uname = "" # user name - self.gname = "" # group name - self.devmajor = 0 # device major number - self.devminor = 0 # device minor number - - self.offset = 0 # the tar header starts here - self.offset_data = 0 # the file's data starts here - - self.pax_headers = {} # pax header information - - # In pax headers the "name" and "linkname" field are called - # "path" and "linkpath". - def _getpath(self): - return self.name - - def _setpath(self, name): - self.name = name - - path = property(_getpath, _setpath) - - def _getlinkpath(self): - return self.linkname - - def _setlinkpath(self, linkname): - self.linkname = linkname - - linkpath = property(_getlinkpath, _setlinkpath) - - def __repr__(self): - return "<%s %r at %#x>" % (self.__class__.__name__, self.name, id(self)) - - def get_info(self, encoding, errors): - """ - Return the TarInfo's attributes as a dictionary. - """ - info = { - "name": self.name, - "mode": self.mode & 0o7777, - "uid": self.uid, - "gid": self.gid, - "size": self.size, - "mtime": self.mtime, - "chksum": self.chksum, - "type": self.type, - "linkname": self.linkname, - "uname": self.uname, - "gname": self.gname, - "devmajor": self.devmajor, - "devminor": self.devminor, - } - - if info["type"] == DIRTYPE and not info["name"].endswith("/"): - info["name"] += "/" - - for key in ("name", "linkname", "uname", "gname"): - if type(info[key]) is str: - info[key] = info[key].encode(encoding, errors) - - return info - - def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"): - """ - Return a tar header as a string of 512 byte blocks. - """ - info = self.get_info(encoding, errors) - - if format == USTAR_FORMAT: - return self.create_ustar_header(info) - elif format == GNU_FORMAT: - return self.create_gnu_header(info) - elif format == PAX_FORMAT: - return self.create_pax_header(info, encoding, errors) - else: - raise ValueError("invalid format") - - def create_ustar_header(self, info): - """ - Return the object as a ustar header block. - """ - info["magic"] = POSIX_MAGIC - - if len(info["linkname"]) > LENGTH_LINK: - raise ValueError("linkname is too long") - - if len(info["name"]) > LENGTH_NAME: - info["prefix"], info["name"] = self._posix_split_name(info["name"]) - - return self._create_header(info, USTAR_FORMAT) - - def create_gnu_header(self, info): - """ - Return the object as a GNU header block sequence. - """ - info["magic"] = GNU_MAGIC - - buf = "" - if len(info["linkname"]) > LENGTH_LINK: - buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK) - - if len(info["name"]) > LENGTH_NAME: - buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME) - - return buf + self._create_header(info, GNU_FORMAT) - - def create_pax_header(self, info, encoding, errors): - """ - Return the object as a ustar header block. - - If it cannot be represented this way, prepend a pax extended header sequence with supplement - information. - """ - info["magic"] = POSIX_MAGIC - pax_headers = self.pax_headers.copy() - - # Test string fields for values that exceed the field length or cannot - # be represented in ASCII encoding. - for name, hname, length in ( - ("name", "path", LENGTH_NAME), - ("linkname", "linkpath", LENGTH_LINK), - ("uname", "uname", 32), - ("gname", "gname", 32), - ): - - if hname in pax_headers: - # The pax header has priority. - continue - - val = info[name].decode(encoding, errors) - - # Try to encode the string as ASCII. - try: - val.encode("ascii") - except UnicodeEncodeError: - pax_headers[hname] = val - continue - - if len(info[name]) > length: - pax_headers[hname] = val - - # Test number fields for values that exceed the field limit or values - # that like to be stored as float. - for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)): - if name in pax_headers: - # The pax header has priority. Avoid overflow. - info[name] = 0 - continue - - val = info[name] - if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float): - pax_headers[name] = str(val) - info[name] = 0 - - # Create a pax extended header if necessary. - if pax_headers: - buf = self._create_pax_generic_header(pax_headers) - else: - buf = "" - - return buf + self._create_header(info, USTAR_FORMAT) - - @classmethod - def create_pax_global_header(cls, pax_headers): - """ - Return the object as a pax global header block sequence. - """ - return cls._create_pax_generic_header(pax_headers, type=XGLTYPE) - - def _posix_split_name(self, name): - """ - Split a name longer than 100 chars into a prefix and a name part. - """ - prefix = name[: LENGTH_PREFIX + 1] - while prefix and prefix[-1] != "/": - prefix = prefix[:-1] - - name = name[len(prefix) :] - prefix = prefix[:-1] - - if not prefix or len(name) > LENGTH_NAME: - raise ValueError("name is too long") - return prefix, name - - @staticmethod - def _create_header(info, format): - """ - Return a header block. - - info is a dictionary with file information, format must be one of the *_FORMAT constants. - """ - parts = [ - stn(info.get("name", ""), 100), - itn(info.get("mode", 0) & 0o7777, 8, format), - itn(info.get("uid", 0), 8, format), - itn(info.get("gid", 0), 8, format), - itn(info.get("size", 0), 12, format), - itn(info.get("mtime", 0), 12, format), - " ", # checksum field - info.get("type", REGTYPE), - stn(info.get("linkname", ""), 100), - stn(info.get("magic", POSIX_MAGIC), 8), - stn(info.get("uname", ""), 32), - stn(info.get("gname", ""), 32), - itn(info.get("devmajor", 0), 8, format), - itn(info.get("devminor", 0), 8, format), - stn(info.get("prefix", ""), 155), - ] - - buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts)) - chksum = calc_chksums(buf[-BLOCKSIZE:])[0] - buf = buf[:-364] + "%06o\0" % chksum + buf[-357:] - return buf - - @staticmethod - def _create_payload(payload): - """ - Return the string payload filled with zero bytes up to the next 512 byte border. - """ - blocks, remainder = divmod(len(payload), BLOCKSIZE) - if remainder > 0: - payload += (BLOCKSIZE - remainder) * NUL - return payload - - @classmethod - def _create_gnu_long_header(cls, name, type): - """ - Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence for name. - """ - name += NUL - - info = {} - info["name"] = "././@LongLink" - info["type"] = type - info["size"] = len(name) - info["magic"] = GNU_MAGIC - - # create extended header + name blocks. - return cls._create_header(info, USTAR_FORMAT) + cls._create_payload(name) - - @classmethod - def _create_pax_generic_header(cls, pax_headers, type=XHDTYPE): - """ - Return a POSIX.1-2001 extended or global header sequence that contains a list of keyword, - value pairs. - - The values must be unicode objects. - """ - records = [] - for keyword, value in pax_headers.items(): - keyword = keyword.encode("utf8") - value = value.encode("utf8") - l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n' - n = p = 0 - while True: - n = l + len(str(p)) - if n == p: - break - p = n - records.append("%d %s=%s\n" % (p, keyword, value)) - records = "".join(records) - - # We use a hardcoded "././@PaxHeader" name like star does - # instead of the one that POSIX recommends. - info = {} - info["name"] = "././@PaxHeader" - info["type"] = type - info["size"] = len(records) - info["magic"] = POSIX_MAGIC - - # Create pax header + record blocks. - return cls._create_header(info, USTAR_FORMAT) + cls._create_payload(records) - - @classmethod - def frombuf(cls, buf): - """ - Construct a TarInfo object from a 512 byte string buffer. - """ - if len(buf) == 0: - raise EmptyHeaderError("empty header") - if len(buf) != BLOCKSIZE: - raise TruncatedHeaderError("truncated header") - if buf.count(NUL) == BLOCKSIZE: - raise EOFHeaderError("end of file header") - - chksum = nti(buf[148:156]) - if chksum not in calc_chksums(buf): - raise InvalidHeaderError("bad checksum") - - obj = cls() - obj.buf = buf - obj.name = nts(buf[0:100]) - obj.mode = nti(buf[100:108]) - obj.uid = nti(buf[108:116]) - obj.gid = nti(buf[116:124]) - obj.size = nti(buf[124:136]) - obj.mtime = nti(buf[136:148]) - obj.chksum = chksum - obj.type = buf[156:157] - obj.linkname = nts(buf[157:257]) - obj.uname = nts(buf[265:297]) - obj.gname = nts(buf[297:329]) - obj.devmajor = nti(buf[329:337]) - obj.devminor = nti(buf[337:345]) - prefix = nts(buf[345:500]) - - # Old V7 tar format represents a directory as a regular - # file with a trailing slash. - if obj.type == AREGTYPE and obj.name.endswith("/"): - obj.type = DIRTYPE - - # Remove redundant slashes from directories. - if obj.isdir(): - obj.name = obj.name.rstrip("/") - - # Reconstruct a ustar longname. - if prefix and obj.type not in GNU_TYPES: - obj.name = prefix + "/" + obj.name - return obj - - @classmethod - def fromtarfile(cls, tarfile): - """ - Return the next TarInfo object from TarFile object tarfile. - """ - buf = tarfile.fileobj.read(BLOCKSIZE) - obj = cls.frombuf(buf) - obj.offset = tarfile.fileobj.tell() - BLOCKSIZE - return obj._proc_member(tarfile) - - # -------------------------------------------------------------------------- - # The following are methods that are called depending on the type of a - # member. The entry point is _proc_member() which can be overridden in a - # subclass to add custom _proc_*() methods. A _proc_*() method MUST - # implement the following - # operations: - # 1. Set self.offset_data to the position where the data blocks begin, - # if there is data that follows. - # 2. Set tarfile.offset to the position where the next member's header will - # begin. - # 3. Return self or another valid TarInfo object. - def _proc_member(self, tarfile): - """ - Choose the right processing method depending on the type and call it. - """ - if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK): - return self._proc_gnulong(tarfile) - elif self.type == GNUTYPE_SPARSE: - return self._proc_sparse(tarfile) - elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE): - return self._proc_pax(tarfile) - else: - return self._proc_builtin(tarfile) - - def _proc_builtin(self, tarfile): - """ - Process a builtin type or an unknown type which will be treated as a regular file. - """ - self.offset_data = tarfile.fileobj.tell() - offset = self.offset_data - if self.isreg() or self.type not in SUPPORTED_TYPES: - # Skip the following data blocks. - offset += self._block(self.size) - tarfile.offset = offset - - # Patch the TarInfo object with saved global - # header information. - self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors) - - return self - - def _proc_gnulong(self, tarfile): - """ - Process the blocks that hold a GNU longname or longlink member. - """ - buf = tarfile.fileobj.read(self._block(self.size)) - - # Fetch the next header and process it. - try: - next = self.fromtarfile(tarfile) - except HeaderError: - raise SubsequentHeaderError("missing or bad subsequent header") - - # Patch the TarInfo object from the next header with - # the longname information. - next.offset = self.offset - if self.type == GNUTYPE_LONGNAME: - next.name = nts(buf) - elif self.type == GNUTYPE_LONGLINK: - next.linkname = nts(buf) - - return next - - def _proc_sparse(self, tarfile): - """ - Process a GNU sparse header plus extra headers. - """ - buf = self.buf - sp = _ringbuffer() - pos = 386 - lastpos = 0 - realpos = 0 - # There are 4 possible sparse structs in the - # first header. - for i in range(4): - try: - offset = nti(buf[pos : pos + 12]) - numbytes = nti(buf[pos + 12 : pos + 24]) - except ValueError: - break - if offset > lastpos: - sp.append(_hole(lastpos, offset - lastpos)) - sp.append(_data(offset, numbytes, realpos)) - realpos += numbytes - lastpos = offset + numbytes - pos += 24 - - isextended = ord(buf[482]) - origsize = nti(buf[483:495]) - - # If the isextended flag is given, - # there are extra headers to process. - while isextended == 1: - buf = tarfile.fileobj.read(BLOCKSIZE) - pos = 0 - for i in range(21): - try: - offset = nti(buf[pos : pos + 12]) - numbytes = nti(buf[pos + 12 : pos + 24]) - except ValueError: - break - if offset > lastpos: - sp.append(_hole(lastpos, offset - lastpos)) - sp.append(_data(offset, numbytes, realpos)) - realpos += numbytes - lastpos = offset + numbytes - pos += 24 - isextended = ord(buf[504]) - - if lastpos < origsize: - sp.append(_hole(lastpos, origsize - lastpos)) - - self.sparse = sp - - self.offset_data = tarfile.fileobj.tell() - tarfile.offset = self.offset_data + self._block(self.size) - self.size = origsize - - return self - - def _proc_pax(self, tarfile): - """ - Process an extended or global header as described in POSIX.1-2001. - """ - # Read the header information. - buf = tarfile.fileobj.read(self._block(self.size)) - - # A pax header stores supplemental information for either - # the following file (extended) or all following files - # (global). - if self.type == XGLTYPE: - pax_headers = tarfile.pax_headers - else: - pax_headers = tarfile.pax_headers.copy() - - # Parse pax header information. A record looks like that: - # "%d %s=%s\n" % (length, keyword, value). length is the size - # of the complete record including the length field itself and - # the newline. keyword and value are both UTF-8 encoded strings. - regex = re.compile(r"(\d+) ([^=]+)=", re.U) - pos = 0 - while True: - match = regex.match(buf, pos) - if not match: - break - - length, keyword = match.groups() - length = int(length) - value = buf[match.end(2) + 1 : match.start(1) + length - 1] - - try: - keyword = keyword.decode("utf8") - except UnicodeDecodeError: - pass - - try: - value = value.decode("utf8") - except UnicodeDecodeError: - pass - - pax_headers[keyword] = value - pos += length - - # Fetch the next header. - try: - next = self.fromtarfile(tarfile) - except HeaderError: - raise SubsequentHeaderError("missing or bad subsequent header") - - if self.type in (XHDTYPE, SOLARIS_XHDTYPE): - # Patch the TarInfo object with the extended header info. - next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors) - next.offset = self.offset - - if "size" in pax_headers: - # If the extended header replaces the size field, - # we need to recalculate the offset where the next - # header starts. - offset = next.offset_data - if next.isreg() or next.type not in SUPPORTED_TYPES: - offset += next._block(next.size) - tarfile.offset = offset - - return next - - def _apply_pax_info(self, pax_headers, encoding, errors): - """ - Replace fields with supplemental information from a previous pax extended or global header. - """ - for keyword, value in pax_headers.items(): - if keyword not in PAX_FIELDS: - continue - - if keyword == "path": - value = value.rstrip("/") - - if keyword in PAX_NUMBER_FIELDS: - try: - value = PAX_NUMBER_FIELDS[keyword](value) - except ValueError: - value = 0 - else: - value = uts(value, encoding, errors) - - setattr(self, keyword, value) - - self.pax_headers = pax_headers.copy() - - def _block(self, count): - """ - Round up a byte count by BLOCKSIZE and return it, e.g. _block(834) => 1024. - """ - blocks, remainder = divmod(count, BLOCKSIZE) - if remainder: - blocks += 1 - return blocks * BLOCKSIZE - - def isreg(self): - return self.type in REGULAR_TYPES - - def isfile(self): - return self.isreg() - - def isdir(self): - return self.type == DIRTYPE - - def issym(self): - return self.type == SYMTYPE - - def islnk(self): - return self.type == LNKTYPE - - def ischr(self): - return self.type == CHRTYPE - - def isblk(self): - return self.type == BLKTYPE - - def isfifo(self): - return self.type == FIFOTYPE - - def issparse(self): - return self.type == GNUTYPE_SPARSE - - def isdev(self): - return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE) - - -# class TarInfo - - -class TarFile(object): - """ - The TarFile Class provides an interface to tar archives. - """ - - debug = 0 # May be set from 0 (no msgs) to 3 (all msgs) - - dereference = False # If true, add content of linked file to the - # tar file, else the link. - - ignore_zeros = False # If true, skips empty or invalid blocks and - # continues processing. - - errorlevel = 1 # If 0, fatal errors only appear in debug - # messages (if debug >= 0). If > 0, errors - # are passed to the caller as exceptions. - - format = DEFAULT_FORMAT # The format to use when creating an archive. - - encoding = ENCODING # Encoding for 8-bit character strings. - - errors = None # Error handler for unicode conversion. - - tarinfo = TarInfo # The default TarInfo class to use. - - fileobject = ExFileObject # The default ExFileObject class to use. - - def __init__( - self, - name=None, - mode="r", - fileobj=None, - format=None, - tarinfo=None, - dereference=None, - ignore_zeros=None, - encoding=None, - errors=None, - pax_headers=None, - debug=None, - errorlevel=None, - ): - """ - Open an (uncompressed) tar archive `name'. - - `mode' is either 'r' to read from an existing archive, 'a' to append data to an existing - file or 'w' to create a new file overwriting an existing one. `mode' defaults to 'r'. If - `fileobj' is given, it is used for reading or writing data. If it can be determined, `mode' - is overridden by `fileobj's mode. `fileobj' is not closed, when TarFile is closed. - """ - modes = {"r": "rb", "a": "r+b", "w": "wb"} - if mode not in modes: - raise ValueError("mode must be 'r', 'a' or 'w'") - self.mode = mode - self._mode = modes[mode] - - if not fileobj: - if self.mode == "a" and not os.path.exists(name): - # Create nonexistent files in append mode. - self.mode = "w" - self._mode = "wb" - fileobj = bltn_open(name, self._mode) - self._extfileobj = False - else: - if name is None and hasattr(fileobj, "name"): - name = fileobj.name - if hasattr(fileobj, "mode"): - self._mode = fileobj.mode - self._extfileobj = True - self.name = os.path.abspath(name) if name else None - self.fileobj = fileobj - - # Init attributes. - if format is not None: - self.format = format - if tarinfo is not None: - self.tarinfo = tarinfo - if dereference is not None: - self.dereference = dereference - if ignore_zeros is not None: - self.ignore_zeros = ignore_zeros - if encoding is not None: - self.encoding = encoding - - if errors is not None: - self.errors = errors - elif mode == "r": - self.errors = "utf-8" - else: - self.errors = "strict" - - if pax_headers is not None and self.format == PAX_FORMAT: - self.pax_headers = pax_headers - else: - self.pax_headers = {} - - if debug is not None: - self.debug = debug - if errorlevel is not None: - self.errorlevel = errorlevel - - # Init datastructures. - self.closed = False - self.members = [] # list of members as TarInfo objects - self._loaded = False # flag if all members have been read - self.offset = self.fileobj.tell() - # current position in the archive file - self.inodes = {} # dictionary caching the inodes of - # archive members already added - - try: - if self.mode == "r": - self.firstmember = None - self.firstmember = next(self) - - if self.mode == "a": - # Move to the end of the archive, - # before the first empty block. - while True: - self.fileobj.seek(self.offset) - try: - tarinfo = self.tarinfo.fromtarfile(self) - self.members.append(tarinfo) - except EOFHeaderError: - self.fileobj.seek(self.offset) - break - except HeaderError as e: - raise ReadError(str(e)) - - if self.mode in "aw": - self._loaded = True - - if self.pax_headers: - buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy()) - self.fileobj.write(buf) - self.offset += len(buf) - except: - if not self._extfileobj: - self.fileobj.close() - self.closed = True - raise - - def _getposix(self): - return self.format == USTAR_FORMAT - - def _setposix(self, value): - import warnings - - warnings.warn("use the format attribute instead", DeprecationWarning, 2) - if value: - self.format = USTAR_FORMAT - else: - self.format = GNU_FORMAT - - posix = property(_getposix, _setposix) - - # -------------------------------------------------------------------------- - # Below are the classmethods which act as alternate constructors to the - # TarFile class. The open() method is the only one that is needed for - # public use; it is the "super"-constructor and is able to select an - # adequate "sub"-constructor for a particular compression using the mapping - # from OPEN_METH. - # - # This concept allows one to subclass TarFile without losing the comfort of - # the super-constructor. A sub-constructor is registered and made available - # by adding it to the mapping in OPEN_METH. - - @classmethod - def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs): - """ - Open a tar archive for reading, writing or appending. Return an appropriate TarFile class. - - mode: - 'r' or 'r:*' open for reading with transparent compression - 'r:' open for reading exclusively uncompressed - 'r:gz' open for reading with gzip compression - 'r:bz2' open for reading with bzip2 compression - 'a' or 'a:' open for appending, creating the file if necessary - 'w' or 'w:' open for writing without compression - 'w:gz' open for writing with gzip compression - 'w:bz2' open for writing with bzip2 compression - - 'r|*' open a stream of tar blocks with transparent compression - 'r|' open an uncompressed stream of tar blocks for reading - 'r|gz' open a gzip compressed stream of tar blocks - 'r|bz2' open a bzip2 compressed stream of tar blocks - 'w|' open an uncompressed stream for writing - 'w|gz' open a gzip compressed stream for writing - 'w|bz2' open a bzip2 compressed stream for writing - """ - - if not name and not fileobj: - raise ValueError("nothing to open") - - if mode in ("r", "r:*"): - # Find out which *open() is appropriate for opening the file. - for comptype in cls.OPEN_METH: - func = getattr(cls, cls.OPEN_METH[comptype]) - if fileobj is not None: - saved_pos = fileobj.tell() - try: - return func(name, "r", fileobj, **kwargs) - except (ReadError, CompressionError) as e: - if fileobj is not None: - fileobj.seek(saved_pos) - continue - raise ReadError("file could not be opened successfully") - - elif ":" in mode: - filemode, comptype = mode.split(":", 1) - filemode = filemode or "r" - comptype = comptype or "tar" - - # Select the *open() function according to - # given compression. - if comptype in cls.OPEN_METH: - func = getattr(cls, cls.OPEN_METH[comptype]) - else: - raise CompressionError("unknown compression type %r" % comptype) - return func(name, filemode, fileobj, **kwargs) - - elif "|" in mode: - filemode, comptype = mode.split("|", 1) - filemode = filemode or "r" - comptype = comptype or "tar" - - if filemode not in ("r", "w"): - raise ValueError("mode must be 'r' or 'w'") - - stream = _Stream(name, filemode, comptype, fileobj, bufsize) - try: - t = cls(name, filemode, stream, **kwargs) - except: - stream.close() - raise - t._extfileobj = False - return t - - elif mode in ("a", "w"): - return cls.taropen(name, mode, fileobj, **kwargs) - - raise ValueError("undiscernible mode") - - @classmethod - def taropen(cls, name, mode="r", fileobj=None, **kwargs): - """ - Open uncompressed tar archive name for reading or writing. - """ - if mode not in ("r", "a", "w"): - raise ValueError("mode must be 'r', 'a' or 'w'") - return cls(name, mode, fileobj, **kwargs) - - @classmethod - def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs): - """ - Open gzip compressed tar archive name for reading or writing. - - Appending is not allowed. - """ - if mode not in ("r", "w"): - raise ValueError("mode must be 'r' or 'w'") - - try: - import gzip - - gzip.GzipFile - except (ImportError, AttributeError): - raise CompressionError("gzip module is not available") - - try: - fileobj = gzip.GzipFile(name, mode, compresslevel, fileobj) - except OSError: - if fileobj is not None and mode == "r": - raise ReadError("not a gzip file") - raise - - try: - t = cls.taropen(name, mode, fileobj, **kwargs) - except IOError: - fileobj.close() - if mode == "r": - raise ReadError("not a gzip file") - raise - except: - fileobj.close() - raise - t._extfileobj = False - return t - - @classmethod - def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs): - """ - Open bzip2 compressed tar archive name for reading or writing. - - Appending is not allowed. - """ - if mode not in ("r", "w"): - raise ValueError("mode must be 'r' or 'w'.") - - try: - import bz2 - except ImportError: - raise CompressionError("bz2 module is not available") - - if fileobj is not None: - fileobj = _BZ2Proxy(fileobj, mode) - else: - fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel) - - try: - t = cls.taropen(name, mode, fileobj, **kwargs) - except (IOError, EOFError): - fileobj.close() - if mode == "r": - raise ReadError("not a bzip2 file") - raise - except: - fileobj.close() - raise - t._extfileobj = False - return t - - # All *open() methods are registered here. - OPEN_METH = { - "tar": "taropen", # uncompressed tar - "gz": "gzopen", # gzip compressed tar - "bz2": "bz2open", # bzip2 compressed tar - } - - # -------------------------------------------------------------------------- - # The public methods which TarFile provides: - - def close(self): - """ - Close the TarFile. - - In write-mode, two finishing zero blocks are appended to the archive. - """ - if self.closed: - return - - self.closed = True - try: - if self.mode in "aw": - self.fileobj.write(NUL * (BLOCKSIZE * 2)) - self.offset += BLOCKSIZE * 2 - # fill up the end with zero-blocks - # (like option -b20 for tar does) - blocks, remainder = divmod(self.offset, RECORDSIZE) - if remainder > 0: - self.fileobj.write(NUL * (RECORDSIZE - remainder)) - finally: - if not self._extfileobj: - self.fileobj.close() - - def getmember(self, name): - """ - Return a TarInfo object for member `name'. - - If `name' can not be found in the archive, KeyError is raised. If a member occurs more than - once in the archive, its last occurrence is assumed to be the most up-to-date version. - """ - tarinfo = self._getmember(name) - if tarinfo is None: - raise KeyError("filename %r not found" % name) - return tarinfo - - def getmembers(self): - """ - Return the members of the archive as a list of TarInfo objects. - - The list has the same order as the members in the archive. - """ - self._check() - if not self._loaded: # if we want to obtain a list of - self._load() # all members, we first have to - # scan the whole archive. - return self.members - - def getnames(self): - """ - Return the members of the archive as a list of their names. - - It has the same order as the list returned by getmembers(). - """ - return [tarinfo.name for tarinfo in self.getmembers()] - - def gettarinfo(self, name=None, arcname=None, fileobj=None): - """ - Create a TarInfo object for either the file `name' or the file object `fileobj' (using - os.fstat on its file descriptor). - - You can modify some of the TarInfo's attributes before you add it using addfile(). If given, - `arcname' specifies an alternative name for the file in the archive. - """ - self._check("aw") - - # When fileobj is given, replace name by - # fileobj's real name. - if fileobj is not None: - name = fileobj.name - - # Building the name of the member in the archive. - # Backward slashes are converted to forward slashes, - # Absolute paths are turned to relative paths. - if arcname is None: - arcname = name - drv, arcname = os.path.splitdrive(arcname) - arcname = arcname.replace(os.sep, "/") - arcname = arcname.lstrip("/") - - # Now, fill the TarInfo object with - # information specific for the file. - tarinfo = self.tarinfo() - tarinfo.tarfile = self - - # Use os.stat or os.lstat, depending on platform - # and if symlinks shall be resolved. - if fileobj is None: - if hasattr(os, "lstat") and not self.dereference: - statres = os.lstat(name) - else: - statres = os.stat(name) - else: - statres = os.fstat(fileobj.fileno()) - linkname = "" - - stmd = statres.st_mode - if stat.S_ISREG(stmd): - inode = (statres.st_ino, statres.st_dev) - if ( - not self.dereference - and statres.st_nlink > 1 - and inode in self.inodes - and arcname != self.inodes[inode] - ): - # Is it a hardlink to an already - # archived file? - type = LNKTYPE - linkname = self.inodes[inode] - else: - # The inode is added only if its valid. - # For win32 it is always 0. - type = REGTYPE - if inode[0]: - self.inodes[inode] = arcname - elif stat.S_ISDIR(stmd): - type = DIRTYPE - elif stat.S_ISFIFO(stmd): - type = FIFOTYPE - elif stat.S_ISLNK(stmd): - type = SYMTYPE - linkname = os.readlink(name) - elif stat.S_ISCHR(stmd): - type = CHRTYPE - elif stat.S_ISBLK(stmd): - type = BLKTYPE - else: - return None - - # Fill the TarInfo object with all - # information we can get. - tarinfo.name = arcname - tarinfo.mode = stmd - tarinfo.uid = statres.st_uid - tarinfo.gid = statres.st_gid - if type == REGTYPE: - tarinfo.size = statres.st_size - else: - tarinfo.size = 0 - tarinfo.mtime = statres.st_mtime - tarinfo.type = type - tarinfo.linkname = linkname - if pwd: - try: - tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0] - except KeyError: - pass - if grp: - try: - tarinfo.gname = grp.getgrgid(tarinfo.gid)[0] - except KeyError: - pass - - if type in (CHRTYPE, BLKTYPE): - if hasattr(os, "major") and hasattr(os, "minor"): - tarinfo.devmajor = os.major(statres.st_rdev) - tarinfo.devminor = os.minor(statres.st_rdev) - return tarinfo - - def list(self, verbose=True): - """ - Print a table of contents to sys.stdout. - - If `verbose' is False, only the names of the members are printed. If it is True, an `ls - -l'-like output is produced. - """ - self._check() - - for tarinfo in self: - if verbose: - print(filemode(tarinfo.mode), end=" ") - print( - "%s/%s" % (tarinfo.uname or tarinfo.uid, tarinfo.gname or tarinfo.gid), end=" " - ) - if tarinfo.ischr() or tarinfo.isblk(): - print("%10s" % ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor)), end=" ") - else: - print("%10d" % tarinfo.size, end=" ") - print("%d-%02d-%02d %02d:%02d:%02d" % time.localtime(tarinfo.mtime)[:6], end=" ") - - print(tarinfo.name + ("/" if tarinfo.isdir() else ""), end=" ") - - if verbose: - if tarinfo.issym(): - print("->", tarinfo.linkname, end=" ") - if tarinfo.islnk(): - print("link to", tarinfo.linkname, end=" ") - print() - - def add(self, name, arcname=None, recursive=True, exclude=None, filter=None): - """ - Add the file `name' to the archive. - - `name' may be any type of file (directory, fifo, symbolic link, etc.). If given, `arcname' - specifies an alternative name for the file in the archive. Directories are added recursively - by default. This can be avoided by setting `recursive' to False. `exclude' is a function - that should return True for each filename to be excluded. `filter' is a function that - expects a TarInfo object argument and returns the changed TarInfo object, if it returns None - the TarInfo object will be excluded from the archive. - """ - self._check("aw") - - if arcname is None: - arcname = name - - # Exclude pathnames. - if exclude is not None: - import warnings - - warnings.warn("use the filter argument instead", DeprecationWarning, 2) - if exclude(name): - self._dbg(2, "tarfile: Excluded %r" % name) - return - - # Skip if somebody tries to archive the archive... - if self.name is not None and os.path.abspath(name) == self.name: - self._dbg(2, "tarfile: Skipped %r" % name) - return - - self._dbg(1, name) - - # Create a TarInfo object from the file. - tarinfo = self.gettarinfo(name, arcname) - - if tarinfo is None: - self._dbg(1, "tarfile: Unsupported type %r" % name) - return - - # Change or exclude the TarInfo object. - if filter is not None: - tarinfo = list(filter(tarinfo)) - if tarinfo is None: - self._dbg(2, "tarfile: Excluded %r" % name) - return - - # Append the tar header and data to the archive. - if tarinfo.isreg(): - with bltn_open(name, "rb") as f: - self.addfile(tarinfo, f) - - elif tarinfo.isdir(): - self.addfile(tarinfo) - if recursive: - for f in os.listdir(name): - self.add( - os.path.join(name, f), os.path.join(arcname, f), recursive, exclude, filter - ) - - else: - self.addfile(tarinfo) - - def addfile(self, tarinfo, fileobj=None): - """ - Add the TarInfo object `tarinfo' to the archive. - - If `fileobj' is given, tarinfo.size bytes are read from it and added to the archive. You can - create TarInfo objects using gettarinfo(). On Windows platforms, `fileobj' should always be - opened with mode 'rb' to avoid irritation about the file size. - """ - self._check("aw") - - tarinfo = copy.copy(tarinfo) - - buf = tarinfo.tobuf(self.format, self.encoding, self.errors) - self.fileobj.write(buf) - self.offset += len(buf) - - # If there's data to follow, append it. - if fileobj is not None: - copyfileobj(fileobj, self.fileobj, tarinfo.size) - blocks, remainder = divmod(tarinfo.size, BLOCKSIZE) - if remainder > 0: - self.fileobj.write(NUL * (BLOCKSIZE - remainder)) - blocks += 1 - self.offset += blocks * BLOCKSIZE - - self.members.append(tarinfo) - - def extractall(self, path=".", members=None): - """ - Extract all members from the archive to the current working directory and set owner, - modification time and permissions on directories afterwards. - - `path' specifies a different directory to extract to. `members' is optional and must be a - subset of the list returned by getmembers(). - """ - directories = [] - - if members is None: - members = self - - for tarinfo in members: - if tarinfo.isdir(): - # Extract directories with a safe mode. - directories.append(tarinfo) - tarinfo = copy.copy(tarinfo) - tarinfo.mode = 0o700 - self.extract(tarinfo, path) - - # Reverse sort directories. - directories.sort(key=operator.attrgetter("name")) - directories.reverse() - - # Set correct owner, mtime and filemode on directories. - for tarinfo in directories: - dirpath = os.path.join(path, tarinfo.name) - try: - self.chown(tarinfo, dirpath) - self.utime(tarinfo, dirpath) - self.chmod(tarinfo, dirpath) - except ExtractError as e: - if self.errorlevel > 1: - raise - else: - self._dbg(1, "tarfile: %s" % e) - - def extract(self, member, path=""): - """ - Extract a member from the archive to the current working directory, using its full name. - - Its file information is extracted as accurately as possible. `member' may be a filename or a - TarInfo object. You can specify a different directory using `path'. - """ - self._check("r") - - if isinstance(member, str): - tarinfo = self.getmember(member) - else: - tarinfo = member - - # Prepare the link target for makelink(). - if tarinfo.islnk(): - tarinfo._link_target = os.path.join(path, tarinfo.linkname) - - try: - self._extract_member(tarinfo, os.path.join(path, tarinfo.name)) - except EnvironmentError as e: - if self.errorlevel > 0: - raise - else: - if e.filename is None: - self._dbg(1, "tarfile: %s" % e.strerror) - else: - self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename)) - except ExtractError as e: - if self.errorlevel > 1: - raise - else: - self._dbg(1, "tarfile: %s" % e) - - def extractfile(self, member): - """ - Extract a member from the archive as a file object. - - `member' may be - a filename or a TarInfo object. If `member' is a regular file, a - file-like object is returned. If `member' is a link, a file-like - object is constructed from the link's target. If `member' is none of - the above, None is returned. - The file-like object is read-only and provides the following - methods: read(), readline(), readlines(), seek() and tell() - """ - self._check("r") - - if isinstance(member, str): - tarinfo = self.getmember(member) - else: - tarinfo = member - - if tarinfo.isreg(): - return self.fileobject(self, tarinfo) - - elif tarinfo.type not in SUPPORTED_TYPES: - # If a member's type is unknown, it is treated as a - # regular file. - return self.fileobject(self, tarinfo) - - elif tarinfo.islnk() or tarinfo.issym(): - if isinstance(self.fileobj, _Stream): - # A small but ugly workaround for the case that someone tries - # to extract a (sym)link as a file-object from a non-seekable - # stream of tar blocks. - raise StreamError("cannot extract (sym)link as file object") - else: - # A (sym)link's file object is its target's file object. - return self.extractfile(self._find_link_target(tarinfo)) - else: - # If there's no data associated with the member (directory, chrdev, - # blkdev, etc.), return None instead of a file object. - return None - - def _extract_member(self, tarinfo, targetpath): - """ - Extract the TarInfo object tarinfo to a physical file called targetpath. - """ - # Fetch the TarInfo object for the given name - # and build the destination pathname, replacing - # forward slashes to platform specific separators. - targetpath = targetpath.rstrip("/") - targetpath = targetpath.replace("/", os.sep) - - # Create all upper directories. - upperdirs = os.path.dirname(targetpath) - if upperdirs and not os.path.exists(upperdirs): - # Create directories that are not part of the archive with - # default permissions. - os.makedirs(upperdirs) - - if tarinfo.islnk() or tarinfo.issym(): - self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname)) - else: - self._dbg(1, tarinfo.name) - - if tarinfo.isreg(): - self.makefile(tarinfo, targetpath) - elif tarinfo.isdir(): - self.makedir(tarinfo, targetpath) - elif tarinfo.isfifo(): - self.makefifo(tarinfo, targetpath) - elif tarinfo.ischr() or tarinfo.isblk(): - self.makedev(tarinfo, targetpath) - elif tarinfo.islnk() or tarinfo.issym(): - self.makelink(tarinfo, targetpath) - elif tarinfo.type not in SUPPORTED_TYPES: - self.makeunknown(tarinfo, targetpath) - else: - self.makefile(tarinfo, targetpath) - - self.chown(tarinfo, targetpath) - if not tarinfo.issym(): - self.chmod(tarinfo, targetpath) - self.utime(tarinfo, targetpath) - - # -------------------------------------------------------------------------- - # Below are the different file methods. They are called via - # _extract_member() when extract() is called. They can be replaced in a - # subclass to implement other functionality. - - def makedir(self, tarinfo, targetpath): - """ - Make a directory called targetpath. - """ - try: - # Use a safe mode for the directory, the real mode is set - # later in _extract_member(). - os.mkdir(targetpath, 0o700) - except EnvironmentError as e: - if e.errno != errno.EEXIST: - raise - - def makefile(self, tarinfo, targetpath): - """ - Make a file called targetpath. - """ - source = self.extractfile(tarinfo) - try: - with bltn_open(targetpath, "wb") as target: - copyfileobj(source, target) - finally: - source.close() - - def makeunknown(self, tarinfo, targetpath): - """ - Make a file from a TarInfo object with an unknown type at targetpath. - """ - self.makefile(tarinfo, targetpath) - self._dbg(1, "tarfile: Unknown file type %r, " "extracted as regular file." % tarinfo.type) - - def makefifo(self, tarinfo, targetpath): - """ - Make a fifo called targetpath. - """ - if hasattr(os, "mkfifo"): - os.mkfifo(targetpath) - else: - raise ExtractError("fifo not supported by system") - - def makedev(self, tarinfo, targetpath): - """ - Make a character or block device called targetpath. - """ - if not hasattr(os, "mknod") or not hasattr(os, "makedev"): - raise ExtractError("special devices not supported by system") - - mode = tarinfo.mode - if tarinfo.isblk(): - mode |= stat.S_IFBLK - else: - mode |= stat.S_IFCHR - - os.mknod(targetpath, mode, os.makedev(tarinfo.devmajor, tarinfo.devminor)) - - def makelink(self, tarinfo, targetpath): - """ - Make a (symbolic) link called targetpath. - - If it cannot be created (platform limitation), we try to make a copy of the referenced file - instead of a link. - """ - if hasattr(os, "symlink") and hasattr(os, "link"): - # For systems that support symbolic and hard links. - if tarinfo.issym(): - if os.path.lexists(targetpath): - os.unlink(targetpath) - os.symlink(tarinfo.linkname, targetpath) - else: - # See extract(). - if os.path.exists(tarinfo._link_target): - if os.path.lexists(targetpath): - os.unlink(targetpath) - os.link(tarinfo._link_target, targetpath) - else: - self._extract_member(self._find_link_target(tarinfo), targetpath) - else: - try: - self._extract_member(self._find_link_target(tarinfo), targetpath) - except KeyError: - raise ExtractError("unable to resolve link inside archive") - - def chown(self, tarinfo, targetpath): - """ - Set owner of targetpath according to tarinfo. - """ - if pwd and hasattr(os, "geteuid") and os.geteuid() == 0: - # We have to be root to do so. - try: - g = grp.getgrnam(tarinfo.gname)[2] - except KeyError: - g = tarinfo.gid - try: - u = pwd.getpwnam(tarinfo.uname)[2] - except KeyError: - u = tarinfo.uid - try: - if tarinfo.issym() and hasattr(os, "lchown"): - os.lchown(targetpath, u, g) - else: - if sys.platform != "os2emx": - os.chown(targetpath, u, g) - except EnvironmentError as e: - raise ExtractError("could not change owner") - - def chmod(self, tarinfo, targetpath): - """ - Set file permissions of targetpath according to tarinfo. - """ - if hasattr(os, "chmod"): - try: - os.chmod(targetpath, tarinfo.mode) - except EnvironmentError as e: - raise ExtractError("could not change mode") - - def utime(self, tarinfo, targetpath): - """ - Set modification time of targetpath according to tarinfo. - """ - if not hasattr(os, "utime"): - return - try: - os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime)) - except EnvironmentError as e: - raise ExtractError("could not change modification time") - - # -------------------------------------------------------------------------- - def __next__(self): - """ - Return the next member of the archive as a TarInfo object, when TarFile is opened for - reading. - - Return None if there is no more available. - """ - self._check("ra") - if self.firstmember is not None: - m = self.firstmember - self.firstmember = None - return m - - # Advance the file pointer. - if self.offset != self.fileobj.tell(): - self.fileobj.seek(self.offset - 1) - if not self.fileobj.read(1): - raise ReadError("unexpected end of data") - - # Read the next block. - tarinfo = None - while True: - try: - tarinfo = self.tarinfo.fromtarfile(self) - except EOFHeaderError as e: - if self.ignore_zeros: - self._dbg(2, "0x%X: %s" % (self.offset, e)) - self.offset += BLOCKSIZE - continue - except InvalidHeaderError as e: - if self.ignore_zeros: - self._dbg(2, "0x%X: %s" % (self.offset, e)) - self.offset += BLOCKSIZE - continue - elif self.offset == 0: - raise ReadError(str(e)) - except EmptyHeaderError: - if self.offset == 0: - raise ReadError("empty file") - except TruncatedHeaderError as e: - if self.offset == 0: - raise ReadError(str(e)) - except SubsequentHeaderError as e: - raise ReadError(str(e)) - break - - if tarinfo is not None: - self.members.append(tarinfo) - else: - self._loaded = True - - return tarinfo - - # -------------------------------------------------------------------------- - # Little helper methods: - - def _getmember(self, name, tarinfo=None, normalize=False): - """ - Find an archive member by name from bottom to top. - - If tarinfo is given, it is used as the starting point. - """ - # Ensure that all members have been loaded. - members = self.getmembers() - - # Limit the member search list up to tarinfo. - if tarinfo is not None: - members = members[: members.index(tarinfo)] - - if normalize: - name = os.path.normpath(name) - - for member in reversed(members): - if normalize: - member_name = os.path.normpath(member.name) - else: - member_name = member.name - - if name == member_name: - return member - - def _load(self): - """ - Read through the entire archive file and look for readable members. - """ - while True: - tarinfo = next(self) - if tarinfo is None: - break - self._loaded = True - - def _check(self, mode=None): - """ - Check if TarFile is still open, and if the operation's mode corresponds to TarFile's mode. - """ - if self.closed: - raise IOError("%s is closed" % self.__class__.__name__) - if mode is not None and self.mode not in mode: - raise IOError("bad operation for mode %r" % self.mode) - - def _find_link_target(self, tarinfo): - """ - Find the target member of a symlink or hardlink member in the archive. - """ - if tarinfo.issym(): - # Always search the entire archive. - linkname = "/".join( - [_f for _f in (os.path.dirname(tarinfo.name), tarinfo.linkname) if _f] - ) - limit = None - else: - # Search the archive before the link, because a hard link is - # just a reference to an already archived file. - linkname = tarinfo.linkname - limit = tarinfo - - member = self._getmember(linkname, tarinfo=limit, normalize=True) - if member is None: - raise KeyError("linkname %r not found" % linkname) - return member - - def __iter__(self): - """ - Provide an iterator object. - """ - if self._loaded: - return iter(self.members) - else: - return TarIter(self) - - def _dbg(self, level, msg): - """ - Write debugging output to sys.stderr. - """ - if level <= self.debug: - print(msg, file=sys.stderr) - - def __enter__(self): - self._check() - return self - - def __exit__(self, type, value, traceback): - if type is None: - self.close() - else: - # An exception occurred. We must not call close() because - # it would try to write end-of-archive blocks and padding. - if not self._extfileobj: - self.fileobj.close() - self.closed = True - - -# class TarFile - - -class TarIter: - """ - Iterator Class. - - for tarinfo in TarFile(...): suite... - """ - - def __init__(self, tarfile): - """ - Construct a TarIter object. - """ - self.tarfile = tarfile - self.index = 0 - - def __iter__(self): - """ - Return iterator object. - """ - return self - - def __next__(self): - """ - Return the next item using TarFile's next() method. - - When all members have been read, set TarFile as _loaded. - """ - # Fix for SF #1100429: Under rare circumstances it can - # happen that getmembers() is called during iteration, - # which will cause TarIter to stop prematurely. - - if self.index == 0 and self.tarfile.firstmember is not None: - tarinfo = next(self.tarfile) - elif self.index < len(self.tarfile.members): - tarinfo = self.tarfile.members[self.index] - elif not self.tarfile._loaded: - tarinfo = next(self.tarfile) - if not tarinfo: - self.tarfile._loaded = True - raise StopIteration - else: - raise StopIteration - self.index += 1 - return tarinfo - - -# Helper classes for sparse file support -class _section: - """ - Base class for _data and _hole. - """ - - def __init__(self, offset, size): - self.offset = offset - self.size = size - - def __contains__(self, offset): - return self.offset <= offset < self.offset + self.size - - -class _data(_section): - """ - Represent a data section in a sparse file. - """ - - def __init__(self, offset, size, realpos): - _section.__init__(self, offset, size) - self.realpos = realpos - - -class _hole(_section): - """ - Represent a hole section in a sparse file. - """ - - pass - - -class _ringbuffer(list): - """ - Ringbuffer class which increases performance over a regular list. - """ - - def __init__(self): - self.idx = 0 - - def find(self, offset): - idx = self.idx - while True: - item = self[idx] - if offset in item: - break - idx += 1 - if idx == len(self): - idx = 0 - if idx == self.idx: - # End of File - return None - self.idx = idx - return item - - -# --------------------------------------------- -# zipfile compatible TarFile class -# --------------------------------------------- -TAR_PLAIN = 0 # zipfile.ZIP_STORED -TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED - - -class TarFileCompat: - """ - TarFile class compatible with standard module zipfile's ZipFile class. - """ - - def __init__(self, file, mode="r", compression=TAR_PLAIN): - from warnings import warnpy3k - - warnpy3k("the TarFileCompat class has been removed in Python 3.0", stacklevel=2) - if compression == TAR_PLAIN: - self.tarfile = TarFile.taropen(file, mode) - elif compression == TAR_GZIPPED: - self.tarfile = TarFile.gzopen(file, mode) - else: - raise ValueError("unknown compression constant") - if mode[0:1] == "r": - members = self.tarfile.getmembers() - for m in members: - m.filename = m.name - m.file_size = m.size - m.date_time = time.gmtime(m.mtime)[:6] - - def namelist(self): - return [m.name for m in self.infolist()] - - def infolist(self): - return [m for m in self.tarfile.getmembers() if m.type in REGULAR_TYPES] - - def printdir(self): - self.tarfile.list() - - def testzip(self): - return - - def getinfo(self, name): - return self.tarfile.getmember(name) - - def read(self, name): - return self.tarfile.extractfile(self.tarfile.getmember(name)).read() - - def write(self, filename, arcname=None, compress_type=None): - self.tarfile.add(filename, arcname) - - def writestr(self, zinfo, bytes): - try: - from io import StringIO - except ImportError: - from io import StringIO - import calendar - - tinfo = TarInfo(zinfo.filename) - tinfo.size = len(bytes) - tinfo.mtime = calendar.timegm(zinfo.date_time) - self.tarfile.addfile(tinfo, StringIO(bytes)) - - def close(self): - self.tarfile.close() - - -# class TarFileCompat - -# -------------------- -# exported functions -# -------------------- -def is_tarfile(name): - """ - Return True if name points to a tar archive that we are able to handle, else return False. - """ - try: - t = open(name) - t.close() - return True - except TarError: - return False - - -open = TarFile.open diff --git a/util/verifybackfill.py b/util/verifybackfill.py deleted file mode 100644 index 04bc4bf98..000000000 --- a/util/verifybackfill.py +++ /dev/null @@ -1,83 +0,0 @@ -import logging -import sys - -from app import app -from data import model -from data.database import ( - RepositoryTag, - Repository, - TagToRepositoryTag, - TagManifest, - ManifestLegacyImage, -) - -logger = logging.getLogger(__name__) - - -def _vs(first, second): - return "%s vs %s" % (first, second) - - -def verify_backfill(namespace_name): - logger.info("Checking namespace %s", namespace_name) - namespace_user = model.user.get_namespace_user(namespace_name) - assert namespace_user - - repo_tags = ( - RepositoryTag.select() - .join(Repository) - .where(Repository.namespace_user == namespace_user) - .where(RepositoryTag.hidden == False) - ) - - repo_tags = list(repo_tags) - logger.info("Found %s tags", len(repo_tags)) - - for index, repo_tag in enumerate(repo_tags): - logger.info( - "Checking tag %s under repository %s (%s/%s)", - repo_tag.name, - repo_tag.repository.name, - index + 1, - len(repo_tags), - ) - - tag = TagToRepositoryTag.get(repository_tag=repo_tag).tag - assert not tag.hidden - assert tag.repository == repo_tag.repository - assert tag.name == repo_tag.name, _vs(tag.name, repo_tag.name) - assert tag.repository == repo_tag.repository, _vs(tag.repository_id, repo_tag.repository_id) - assert tag.reversion == repo_tag.reversion, _vs(tag.reversion, repo_tag.reversion) - - start_check = int(tag.lifetime_start_ms // 1000) == repo_tag.lifetime_start_ts - assert start_check, _vs(tag.lifetime_start_ms, repo_tag.lifetime_start_ts) - if repo_tag.lifetime_end_ts is not None: - end_check = int(tag.lifetime_end_ms // 1000) == repo_tag.lifetime_end_ts - assert end_check, _vs(tag.lifetime_end_ms, repo_tag.lifetime_end_ts) - else: - assert tag.lifetime_end_ms is None - - try: - tag_manifest = tag.manifest - repo_tag_manifest = TagManifest.get(tag=repo_tag) - - digest_check = tag_manifest.digest == repo_tag_manifest.digest - assert digest_check, _vs(tag_manifest.digest, repo_tag_manifest.digest) - - bytes_check = tag_manifest.manifest_bytes == repo_tag_manifest.json_data - assert bytes_check, _vs(tag_manifest.manifest_bytes, repo_tag_manifest.json_data) - except TagManifest.DoesNotExist: - logger.info("No tag manifest found for repository tag %s", repo_tag.id) - - mli = ManifestLegacyImage.get(manifest=tag_manifest) - assert mli.repository == repo_tag.repository - - manifest_legacy_image = mli.image - assert manifest_legacy_image == repo_tag.image, _vs( - manifest_legacy_image.id, repo_tag.image_id - ) - - -if __name__ == "__main__": - logging.basicConfig(level=logging.INFO) - verify_backfill(sys.argv[1]) diff --git a/verbs.py b/verbs.py deleted file mode 100644 index 59c9b4c29..000000000 --- a/verbs.py +++ /dev/null @@ -1,7 +0,0 @@ -# NOTE: We don't gevent patch here because `verbs` uses `sync` workers. - -from app import app as application -from endpoints.verbs import verbs - - -application.register_blueprint(verbs, url_prefix="/c1") diff --git a/workers/manifestbackfillworker.py b/workers/manifestbackfillworker.py new file mode 100644 index 000000000..a576622cd --- /dev/null +++ b/workers/manifestbackfillworker.py @@ -0,0 +1,101 @@ +import logging + +from peewee import fn + +import features + +from app import app +from data.database import Manifest +from image.shared.schemas import parse_manifest_from_bytes, ManifestException +from workers.worker import Worker +from util.migrate.allocator import yield_random_entries +from util.bytes import Bytes +from util.log import logfile_path + + +logger = logging.getLogger(__name__) + +WORKER_FREQUENCY = app.config.get("MANIFEST_BACKFILL_WORKER_FREQUENCY", 60 * 60) + + +class ManifestBackfillWorker(Worker): + """ + Worker which backfills the newly added layers compressed size and config media type + fields onto Manifest. + """ + + def __init__(self): + super(ManifestBackfillWorker, self).__init__() + self.add_operation(self._backfill_manifests, WORKER_FREQUENCY) + + def _backfill_manifests(self): + try: + Manifest.select().where(Manifest.layers_compressed_size >> None).get() + except Manifest.DoesNotExist: + logger.debug("Manifest backfill worker has completed; skipping") + return False + + iterator = yield_random_entries( + lambda: Manifest.select().where(Manifest.layers_compressed_size >> None), + Manifest.id, + 250, + Manifest.select(fn.Max(Manifest.id)).scalar(), + 1, + ) + + for manifest_row, abt, _ in iterator: + if manifest_row.layers_compressed_size is not None: + logger.debug("Another worker preempted this worker") + abt.set() + continue + + logger.debug("Setting layers compressed size for manifest %s", manifest_row.id) + layers_compressed_size = -1 + config_media_type = None + manifest_bytes = Bytes.for_string_or_unicode(manifest_row.manifest_bytes) + + try: + parsed = parse_manifest_from_bytes( + manifest_bytes, manifest_row.media_type.name, validate=False + ) + layers_compressed_size = parsed.layers_compressed_size + if layers_compressed_size is None: + layers_compressed_size = 0 + + config_media_type = parsed.config_media_type or None + except ManifestException as me: + logger.warning( + "Got exception when trying to parse manifest %s: %s", manifest_row.id, me + ) + + assert layers_compressed_size is not None + updated = ( + Manifest.update( + layers_compressed_size=layers_compressed_size, + config_media_type=config_media_type, + ) + .where(Manifest.id == manifest_row.id, Manifest.layers_compressed_size >> None) + .execute() + ) + if updated != 1: + logger.debug("Another worker preempted this worker") + abt.set() + continue + + return True + + +def main(): + logging.config.fileConfig(logfile_path(debug=False), disable_existing_loggers=False) + + if not features.MANIFEST_SIZE_BACKFILL: + logger.debug("Manifest backfill worker not enabled; skipping") + while True: + time.sleep(100000) + + worker = ManifestBackfillWorker() + worker.start() + + +if __name__ == "__main__": + main() diff --git a/workers/notificationworker/models_pre_oci.py b/workers/notificationworker/models_pre_oci.py index 15fb912da..1db25e92f 100644 --- a/workers/notificationworker/models_pre_oci.py +++ b/workers/notificationworker/models_pre_oci.py @@ -1,6 +1,7 @@ import json from data import model +from data.database import RepositoryNotification from workers.notificationworker.models_interface import ( NotificationWorkerDataInterface, Notification, @@ -14,8 +15,8 @@ def notification(notification_row): """ return Notification( uuid=notification_row.uuid, - event_name=notification_row.event.name, - method_name=notification_row.method.name, + event_name=RepositoryNotification.event.get_name(notification_row.event_id), + method_name=RepositoryNotification.method.get_name(notification_row.method_id), event_config_dict=json.loads(notification_row.event_config_json or "{}"), method_config_dict=json.loads(notification_row.config_json or "{}"), repository=Repository( diff --git a/workers/repomirrorworker/test/test_repomirrorworker.py b/workers/repomirrorworker/test/test_repomirrorworker.py index 5897101c0..31996a9e9 100644 --- a/workers/repomirrorworker/test/test_repomirrorworker.py +++ b/workers/repomirrorworker/test/test_repomirrorworker.py @@ -56,14 +56,18 @@ def _create_tag(repo, name): ) upload.upload_chunk(app_config, BytesIO(config_json.encode("utf-8"))) blob = upload.commit_to_blob(app_config) + assert blob + builder = DockerSchema2ManifestBuilder() builder.set_config_digest(blob.digest, blob.compressed_size) builder.add_layer("sha256:abcd", 1234, urls=["http://hello/world"]) manifest = builder.build() manifest, tag = registry_model.create_manifest_and_retarget_tag( - repo_ref, manifest, name, storage + repo_ref, manifest, name, storage, raise_on_error=True ) + assert tag + assert tag.name == name @disable_existing_mirrors diff --git a/workers/security_notification_worker.py b/workers/security_notification_worker.py deleted file mode 100644 index 27d1b1cb1..000000000 --- a/workers/security_notification_worker.py +++ /dev/null @@ -1,106 +0,0 @@ -import logging -import time -import json - -import features - -from app import secscan_notification_queue -from data.secscan_model import secscan_model -from workers.queueworker import QueueWorker, JobException -from util.secscan.notifier import SecurityNotificationHandler, ProcessNotificationPageResult - - -logger = logging.getLogger(__name__) - - -_PROCESSING_SECONDS = 60 * 60 # 1 hour -_LAYER_LIMIT = 1000 # The number of layers to request on each page. - - -class SecurityNotificationWorker(QueueWorker): - """ NOTE: This worker is legacy code and should be removed after we've fully moved to Clair V4 - API. - """ - - def process_queue_item(self, data): - self.perform_notification_work(data) - - def perform_notification_work(self, data, layer_limit=_LAYER_LIMIT): - """ - Performs the work for handling a security notification as referenced by the given data - object. - - Returns True on successful handling, False on non-retryable failure and raises a - JobException on retryable failure. - """ - secscan_api = secscan_model.legacy_api_handler - - notification_name = data["Name"] - current_page = data.get("page", None) - handler = SecurityNotificationHandler(secscan_api, layer_limit) - - while True: - # Retrieve the current page of notification data from the security scanner. - (response_data, should_retry) = secscan_api.get_notification( - notification_name, layer_limit=layer_limit, page=current_page - ) - - # If no response, something went wrong. - if response_data is None: - if should_retry: - raise JobException() - else: - # Remove the job from the API. - logger.error("Failed to handle security notification %s", notification_name) - secscan_api.mark_notification_read(notification_name) - - # Return to mark the job as "complete", as we'll never be able to finish it. - return False - - # Extend processing on the queue item so it doesn't expire while we're working. - self.extend_processing(_PROCESSING_SECONDS, json.dumps(data)) - - # Process the notification data. - notification_data = response_data["Notification"] - result = handler.process_notification_page_data(notification_data) - - # Possible states after processing: failed to process, finished processing entirely - # or finished processing the page. - if result == ProcessNotificationPageResult.FAILED: - # Something went wrong. - raise JobException - - if result == ProcessNotificationPageResult.FINISHED_PROCESSING: - # Mark the notification as read. - if not secscan_api.mark_notification_read(notification_name): - # Return to mark the job as "complete", as we'll never be able to finish it. - logger.error("Failed to mark notification %s as read", notification_name) - return False - - # Send the generated Quay notifications. - handler.send_notifications() - return True - - if result == ProcessNotificationPageResult.FINISHED_PAGE: - # Continue onto the next page. - current_page = notification_data["NextPage"] - continue - - -if __name__ == "__main__": - if ( - not features.SECURITY_SCANNER - or not features.SECURITY_NOTIFICATIONS - or not secscan_model.legacy_api_handler - ): - logger.debug("Security scanner disabled; skipping SecurityNotificationWorker") - while True: - time.sleep(100000) - - worker = SecurityNotificationWorker( - secscan_notification_queue, - poll_period_seconds=30, - reservation_seconds=30, - retry_after_seconds=30, - ) - worker.start() diff --git a/workers/test/test_manifestbackfillworker.py b/workers/test/test_manifestbackfillworker.py new file mode 100644 index 000000000..024ce9e55 --- /dev/null +++ b/workers/test/test_manifestbackfillworker.py @@ -0,0 +1,34 @@ +import pytest + +from data import model, database +from image.shared.schemas import parse_manifest_from_bytes, ManifestException +from workers.manifestbackfillworker import ManifestBackfillWorker +from util.bytes import Bytes +from test.fixtures import * + + +def test_basic(initialized_db): + worker = ManifestBackfillWorker() + + # Try with none to backfill. + assert not worker._backfill_manifests() + + # Delete the sizes on some manifest rows. + database.Manifest.update(layers_compressed_size=None).execute() + + # Try the backfill now. + assert worker._backfill_manifests() + + # Ensure the rows were updated and correct. + for manifest_row in database.Manifest.select(): + assert manifest_row.layers_compressed_size is not None + + manifest_bytes = Bytes.for_string_or_unicode(manifest_row.manifest_bytes) + parsed = parse_manifest_from_bytes( + manifest_bytes, manifest_row.media_type.name, validate=False + ) + layers_compressed_size = parsed.layers_compressed_size or 0 + assert manifest_row.layers_compressed_size == layers_compressed_size + assert manifest_row.config_media_type == parsed.config_media_type + + assert not worker._backfill_manifests()