From 13985566e7dff89bbfe8055118714773fca828f0 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Thu, 3 Oct 2024 19:11:27 +0200
Subject: [PATCH 01/16] check_test_cases.py: make 3.6 identical with
 development

To prepare for the move of check_test_cases.py to the version-independent
framework repository, make the file in mbedtls-3.6 identical to the file
in development.

In development, check_test_cases.py now looks for tests under tf-psa-crypto.
This is useless but harmless in mbedtls-3.6.

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 tests/scripts/check_test_cases.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/tests/scripts/check_test_cases.py b/tests/scripts/check_test_cases.py
index d67e6781b4..6809dd5e77 100755
--- a/tests/scripts/check_test_cases.py
+++ b/tests/scripts/check_test_cases.py
@@ -16,6 +16,9 @@ import re
 import subprocess
 import sys
 
+import scripts_path # pylint: disable=unused-import
+from mbedtls_framework import build_tree
+
 class ScriptOutputError(ValueError):
     """A kind of ValueError that indicates we found
     the script doesn't list test cases in an expected
@@ -130,13 +133,10 @@ option"""
     @staticmethod
     def collect_test_directories():
         """Get the relative path for the TLS and Crypto test directories."""
-        if os.path.isdir('tests'):
-            tests_dir = 'tests'
-        elif os.path.isdir('suites'):
-            tests_dir = '.'
-        elif os.path.isdir('../suites'):
-            tests_dir = '..'
-        directories = [tests_dir]
+        mbedtls_root = build_tree.guess_mbedtls_root()
+        directories = [os.path.join(mbedtls_root, 'tests'),
+                       os.path.join(mbedtls_root, 'tf-psa-crypto', 'tests')]
+        directories = [os.path.relpath(p) for p in directories]
         return directories
 
     def walk_all(self):
@@ -149,7 +149,8 @@ option"""
 
             for sh_file in ['ssl-opt.sh', 'compat.sh']:
                 sh_file = os.path.join(directory, sh_file)
-                self.collect_from_script(sh_file)
+                if os.path.isfile(sh_file):
+                    self.collect_from_script(sh_file)
 
 class TestDescriptions(TestDescriptionExplorer):
     """Collect the available test cases."""

From 31e31523ad16319f3a61fb2a9571fbfcadf57a1f Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Thu, 3 Oct 2024 17:23:53 +0200
Subject: [PATCH 02/16] Create a module to split test case collection from
 checks

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 tests/scripts/check_test_cases.py   | 1 +
 tests/scripts/collect_test_cases.py | 4 ++++
 2 files changed, 5 insertions(+)
 create mode 100644 tests/scripts/collect_test_cases.py

diff --git a/tests/scripts/check_test_cases.py b/tests/scripts/check_test_cases.py
index 6809dd5e77..a7ad9aebe3 100755
--- a/tests/scripts/check_test_cases.py
+++ b/tests/scripts/check_test_cases.py
@@ -18,6 +18,7 @@ import sys
 
 import scripts_path # pylint: disable=unused-import
 from mbedtls_framework import build_tree
+import collect_test_cases
 
 class ScriptOutputError(ValueError):
     """A kind of ValueError that indicates we found
diff --git a/tests/scripts/collect_test_cases.py b/tests/scripts/collect_test_cases.py
new file mode 100644
index 0000000000..7e9031c970
--- /dev/null
+++ b/tests/scripts/collect_test_cases.py
@@ -0,0 +1,4 @@
+"""Discover all the test cases (unit tests and SSL tests)."""
+
+# Copyright The Mbed TLS Contributors
+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later

From eba00974d60642c8440a9f0550aa395894ff1137 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Thu, 3 Oct 2024 17:35:52 +0200
Subject: [PATCH 03/16] Split test case collection from checks

Move the test case collection code out of check_test_cases.py and into its
own module. This allows outcome analysis to depend only on the new module
and not on check_test_cases.py.

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 tests/scripts/analyze_outcomes.py   |   4 +-
 tests/scripts/check_test_cases.py   | 164 +---------------------------
 tests/scripts/collect_test_cases.py | 163 +++++++++++++++++++++++++++
 3 files changed, 169 insertions(+), 162 deletions(-)

diff --git a/tests/scripts/analyze_outcomes.py b/tests/scripts/analyze_outcomes.py
index 188b68d1d5..87efb1b89c 100755
--- a/tests/scripts/analyze_outcomes.py
+++ b/tests/scripts/analyze_outcomes.py
@@ -14,7 +14,7 @@ import subprocess
 import os
 import typing
 
-import check_test_cases
+import collect_test_cases
 
 
 # `ComponentOutcomes` is a named tuple which is defined as:
@@ -197,7 +197,7 @@ class CoverageTask(Task):
             sys.stderr.write(cp.stdout.decode('utf-8'))
             results.error("Failed \"make generated_files\" in tests. "
                           "Coverage analysis may be incorrect.")
-        available = check_test_cases.collect_available_test_cases()
+        available = collect_test_cases.collect_available_test_cases()
         for suite_case in available:
             hit = any(suite_case in comp_outcomes.successes or
                       suite_case in comp_outcomes.failures
diff --git a/tests/scripts/check_test_cases.py b/tests/scripts/check_test_cases.py
index a7ad9aebe3..b00319bad7 100755
--- a/tests/scripts/check_test_cases.py
+++ b/tests/scripts/check_test_cases.py
@@ -10,170 +10,14 @@ independently of the checks.
 # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
 
 import argparse
-import glob
-import os
 import re
-import subprocess
 import sys
 
 import scripts_path # pylint: disable=unused-import
-from mbedtls_framework import build_tree
 import collect_test_cases
 
-class ScriptOutputError(ValueError):
-    """A kind of ValueError that indicates we found
-    the script doesn't list test cases in an expected
-    pattern.
-    """
 
-    @property
-    def script_name(self):
-        return super().args[0]
-
-    @property
-    def idx(self):
-        return super().args[1]
-
-    @property
-    def line(self):
-        return super().args[2]
-
-class Results:
-    """Store file and line information about errors or warnings in test suites."""
-
-    def __init__(self, options):
-        self.errors = 0
-        self.warnings = 0
-        self.ignore_warnings = options.quiet
-
-    def error(self, file_name, line_number, fmt, *args):
-        sys.stderr.write(('{}:{}:ERROR:' + fmt + '\n').
-                         format(file_name, line_number, *args))
-        self.errors += 1
-
-    def warning(self, file_name, line_number, fmt, *args):
-        if not self.ignore_warnings:
-            sys.stderr.write(('{}:{}:Warning:' + fmt + '\n')
-                             .format(file_name, line_number, *args))
-            self.warnings += 1
-
-class TestDescriptionExplorer:
-    """An iterator over test cases with descriptions.
-
-The test cases that have descriptions are:
-* Individual unit tests (entries in a .data file) in test suites.
-* Individual test cases in ssl-opt.sh.
-
-This is an abstract class. To use it, derive a class that implements
-the process_test_case method, and call walk_all().
-"""
-
-    def process_test_case(self, per_file_state,
-                          file_name, line_number, description):
-        """Process a test case.
-
-per_file_state: an object created by new_per_file_state() at the beginning
-                of each file.
-file_name: a relative path to the file containing the test case.
-line_number: the line number in the given file.
-description: the test case description as a byte string.
-"""
-        raise NotImplementedError
-
-    def new_per_file_state(self):
-        """Return a new per-file state object.
-
-The default per-file state object is None. Child classes that require per-file
-state may override this method.
-"""
-        #pylint: disable=no-self-use
-        return None
-
-    def walk_test_suite(self, data_file_name):
-        """Iterate over the test cases in the given unit test data file."""
-        in_paragraph = False
-        descriptions = self.new_per_file_state() # pylint: disable=assignment-from-none
-        with open(data_file_name, 'rb') as data_file:
-            for line_number, line in enumerate(data_file, 1):
-                line = line.rstrip(b'\r\n')
-                if not line:
-                    in_paragraph = False
-                    continue
-                if line.startswith(b'#'):
-                    continue
-                if not in_paragraph:
-                    # This is a test case description line.
-                    self.process_test_case(descriptions,
-                                           data_file_name, line_number, line)
-                in_paragraph = True
-
-    def collect_from_script(self, script_name):
-        """Collect the test cases in a script by calling its listing test cases
-option"""
-        descriptions = self.new_per_file_state() # pylint: disable=assignment-from-none
-        listed = subprocess.check_output(['sh', script_name, '--list-test-cases'])
-        # Assume test file is responsible for printing identical format of
-        # test case description between --list-test-cases and its OUTCOME.CSV
-        #
-        # idx indicates the number of test case since there is no line number
-        # in the script for each test case.
-        for idx, line in enumerate(listed.splitlines()):
-            # We are expecting the script to list the test cases in
-            # `<suite_name>;<description>` pattern.
-            script_outputs = line.split(b';', 1)
-            if len(script_outputs) == 2:
-                suite_name, description = script_outputs
-            else:
-                raise ScriptOutputError(script_name, idx, line.decode("utf-8"))
-
-            self.process_test_case(descriptions,
-                                   suite_name.decode('utf-8'),
-                                   idx,
-                                   description.rstrip())
-
-    @staticmethod
-    def collect_test_directories():
-        """Get the relative path for the TLS and Crypto test directories."""
-        mbedtls_root = build_tree.guess_mbedtls_root()
-        directories = [os.path.join(mbedtls_root, 'tests'),
-                       os.path.join(mbedtls_root, 'tf-psa-crypto', 'tests')]
-        directories = [os.path.relpath(p) for p in directories]
-        return directories
-
-    def walk_all(self):
-        """Iterate over all named test cases."""
-        test_directories = self.collect_test_directories()
-        for directory in test_directories:
-            for data_file_name in glob.glob(os.path.join(directory, 'suites',
-                                                         '*.data')):
-                self.walk_test_suite(data_file_name)
-
-            for sh_file in ['ssl-opt.sh', 'compat.sh']:
-                sh_file = os.path.join(directory, sh_file)
-                if os.path.isfile(sh_file):
-                    self.collect_from_script(sh_file)
-
-class TestDescriptions(TestDescriptionExplorer):
-    """Collect the available test cases."""
-
-    def __init__(self):
-        super().__init__()
-        self.descriptions = set()
-
-    def process_test_case(self, _per_file_state,
-                          file_name, _line_number, description):
-        """Record an available test case."""
-        base_name = re.sub(r'\.[^.]*$', '', re.sub(r'.*/', '', file_name))
-        key = ';'.join([base_name, description.decode('utf-8')])
-        self.descriptions.add(key)
-
-def collect_available_test_cases():
-    """Collect the available test cases."""
-    explorer = TestDescriptions()
-    explorer.walk_all()
-    return sorted(explorer.descriptions)
-
-class DescriptionChecker(TestDescriptionExplorer):
+class DescriptionChecker(collect_test_cases.TestDescriptionExplorer):
     """Check all test case descriptions.
 
 * Check that each description is valid (length, allowed character set, etc.).
@@ -223,14 +67,14 @@ def main():
                         help='Show warnings (default: on; undoes --quiet)')
     options = parser.parse_args()
     if options.list_all:
-        descriptions = collect_available_test_cases()
+        descriptions = collect_test_cases.collect_available_test_cases()
         sys.stdout.write('\n'.join(descriptions + ['']))
         return
-    results = Results(options)
+    results = collect_test_cases.Results(options)
     checker = DescriptionChecker(results)
     try:
         checker.walk_all()
-    except ScriptOutputError as e:
+    except collect_test_cases.ScriptOutputError as e:
         results.error(e.script_name, e.idx,
                       '"{}" should be listed as "<suite_name>;<description>"',
                       e.line)
diff --git a/tests/scripts/collect_test_cases.py b/tests/scripts/collect_test_cases.py
index 7e9031c970..c326710b67 100644
--- a/tests/scripts/collect_test_cases.py
+++ b/tests/scripts/collect_test_cases.py
@@ -2,3 +2,166 @@
 
 # Copyright The Mbed TLS Contributors
 # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+
+import glob
+import os
+import re
+import subprocess
+import sys
+
+import scripts_path # pylint: disable=unused-import
+from mbedtls_framework import build_tree
+
+
+class ScriptOutputError(ValueError):
+    """A kind of ValueError that indicates we found
+    the script doesn't list test cases in an expected
+    pattern.
+    """
+
+    @property
+    def script_name(self):
+        return super().args[0]
+
+    @property
+    def idx(self):
+        return super().args[1]
+
+    @property
+    def line(self):
+        return super().args[2]
+
+class Results:
+    """Store file and line information about errors or warnings in test suites."""
+
+    def __init__(self, options):
+        self.errors = 0
+        self.warnings = 0
+        self.ignore_warnings = options.quiet
+
+    def error(self, file_name, line_number, fmt, *args):
+        sys.stderr.write(('{}:{}:ERROR:' + fmt + '\n').
+                         format(file_name, line_number, *args))
+        self.errors += 1
+
+    def warning(self, file_name, line_number, fmt, *args):
+        if not self.ignore_warnings:
+            sys.stderr.write(('{}:{}:Warning:' + fmt + '\n')
+                             .format(file_name, line_number, *args))
+            self.warnings += 1
+
+class TestDescriptionExplorer:
+    """An iterator over test cases with descriptions.
+
+The test cases that have descriptions are:
+* Individual unit tests (entries in a .data file) in test suites.
+* Individual test cases in ssl-opt.sh.
+
+This is an abstract class. To use it, derive a class that implements
+the process_test_case method, and call walk_all().
+"""
+
+    def process_test_case(self, per_file_state,
+                          file_name, line_number, description):
+        """Process a test case.
+
+per_file_state: an object created by new_per_file_state() at the beginning
+                of each file.
+file_name: a relative path to the file containing the test case.
+line_number: the line number in the given file.
+description: the test case description as a byte string.
+"""
+        raise NotImplementedError
+
+    def new_per_file_state(self):
+        """Return a new per-file state object.
+
+The default per-file state object is None. Child classes that require per-file
+state may override this method.
+"""
+        #pylint: disable=no-self-use
+        return None
+
+    def walk_test_suite(self, data_file_name):
+        """Iterate over the test cases in the given unit test data file."""
+        in_paragraph = False
+        descriptions = self.new_per_file_state() # pylint: disable=assignment-from-none
+        with open(data_file_name, 'rb') as data_file:
+            for line_number, line in enumerate(data_file, 1):
+                line = line.rstrip(b'\r\n')
+                if not line:
+                    in_paragraph = False
+                    continue
+                if line.startswith(b'#'):
+                    continue
+                if not in_paragraph:
+                    # This is a test case description line.
+                    self.process_test_case(descriptions,
+                                           data_file_name, line_number, line)
+                in_paragraph = True
+
+    def collect_from_script(self, script_name):
+        """Collect the test cases in a script by calling its listing test cases
+option"""
+        descriptions = self.new_per_file_state() # pylint: disable=assignment-from-none
+        listed = subprocess.check_output(['sh', script_name, '--list-test-cases'])
+        # Assume test file is responsible for printing identical format of
+        # test case description between --list-test-cases and its OUTCOME.CSV
+        #
+        # idx indicates the number of test case since there is no line number
+        # in the script for each test case.
+        for idx, line in enumerate(listed.splitlines()):
+            # We are expecting the script to list the test cases in
+            # `<suite_name>;<description>` pattern.
+            script_outputs = line.split(b';', 1)
+            if len(script_outputs) == 2:
+                suite_name, description = script_outputs
+            else:
+                raise ScriptOutputError(script_name, idx, line.decode("utf-8"))
+
+            self.process_test_case(descriptions,
+                                   suite_name.decode('utf-8'),
+                                   idx,
+                                   description.rstrip())
+
+    @staticmethod
+    def collect_test_directories():
+        """Get the relative path for the TLS and Crypto test directories."""
+        mbedtls_root = build_tree.guess_mbedtls_root()
+        directories = [os.path.join(mbedtls_root, 'tests'),
+                       os.path.join(mbedtls_root, 'tf-psa-crypto', 'tests')]
+        directories = [os.path.relpath(p) for p in directories]
+        return directories
+
+    def walk_all(self):
+        """Iterate over all named test cases."""
+        test_directories = self.collect_test_directories()
+        for directory in test_directories:
+            for data_file_name in glob.glob(os.path.join(directory, 'suites',
+                                                         '*.data')):
+                self.walk_test_suite(data_file_name)
+
+            for sh_file in ['ssl-opt.sh', 'compat.sh']:
+                sh_file = os.path.join(directory, sh_file)
+                if os.path.isfile(sh_file):
+                    self.collect_from_script(sh_file)
+
+class TestDescriptions(TestDescriptionExplorer):
+    """Collect the available test cases."""
+
+    def __init__(self):
+        super().__init__()
+        self.descriptions = set()
+
+    def process_test_case(self, _per_file_state,
+                          file_name, _line_number, description):
+        """Record an available test case."""
+        base_name = re.sub(r'\.[^.]*$', '', re.sub(r'.*/', '', file_name))
+        key = ';'.join([base_name, description.decode('utf-8')])
+        self.descriptions.add(key)
+
+def collect_available_test_cases():
+    """Collect the available test cases."""
+    explorer = TestDescriptions()
+    explorer.walk_all()
+    return sorted(explorer.descriptions)

From efe084b2eeead0d3d2a1ecfd7e3f6e1212734986 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Thu, 3 Oct 2024 17:38:32 +0200
Subject: [PATCH 04/16] Create a module to split branch-independent code out of
 analyze_outcomes.py

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 tests/scripts/outcome_analysis.py | 9 +++++++++
 1 file changed, 9 insertions(+)
 create mode 100644 tests/scripts/outcome_analysis.py

diff --git a/tests/scripts/outcome_analysis.py b/tests/scripts/outcome_analysis.py
new file mode 100644
index 0000000000..84b83eb81b
--- /dev/null
+++ b/tests/scripts/outcome_analysis.py
@@ -0,0 +1,9 @@
+"""Outcome file analysis code.
+
+This module is the bulk of the code of tests/scripts/analyze_outcomes.py
+in each consuming branch. The consuming script is expected to derive
+the classes with branch-specific customizations such as ignore lists.
+"""
+
+# Copyright The Mbed TLS Contributors
+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later

From 38de3e5de11b85bcd3829c3ae7295ace1197df28 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Thu, 3 Oct 2024 17:40:55 +0200
Subject: [PATCH 05/16] Remove sample ignore list elements for coverage

The ignore list for coverage only has two test cases out of ~10000 that are
currently reported as not executed. This is a drop in the sea and not
useful. Remove them so that the class can be used generically. A follow-up
will construct a comprehensive ignore list.

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 tests/scripts/analyze_outcomes.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/tests/scripts/analyze_outcomes.py b/tests/scripts/analyze_outcomes.py
index 87efb1b89c..8123ea1ef5 100755
--- a/tests/scripts/analyze_outcomes.py
+++ b/tests/scripts/analyze_outcomes.py
@@ -167,14 +167,6 @@ class CoverageTask(Task):
     # Test cases whose suite and description are matched by an entry in
     # IGNORED_TESTS are expected to be never executed.
     # All other test cases are expected to be executed at least once.
-    IGNORED_TESTS = {
-        'test_suite_psa_crypto_metadata': [
-            # Algorithm not supported yet
-            'Asymmetric signature: pure EdDSA',
-            # Algorithm not supported yet
-            'Cipher: XTS',
-        ],
-    }
 
     def __init__(self, options) -> None:
         super().__init__(options)

From 40a98a4b649427533cf6951dcc8f59d61b4ccb31 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Thu, 3 Oct 2024 18:18:33 +0200
Subject: [PATCH 06/16] Missing NotImplementedError in abstract method

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 tests/scripts/analyze_outcomes.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/scripts/analyze_outcomes.py b/tests/scripts/analyze_outcomes.py
index 8123ea1ef5..decc33496a 100755
--- a/tests/scripts/analyze_outcomes.py
+++ b/tests/scripts/analyze_outcomes.py
@@ -135,6 +135,7 @@ class Task:
 
     def section_name(self) -> str:
         """The section name to use in results."""
+        raise NotImplementedError
 
     def ignored_tests(self, test_suite: str) -> typing.Iterator[IgnoreEntry]:
         """Generate the ignore list for the specified test suite."""

From c2df8d4e9be422cf0cee18cb3a0664f3b230e322 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Thu, 3 Oct 2024 18:27:13 +0200
Subject: [PATCH 07/16] Don't reuse a variable name inside a function

Use different names for task name, a task class and a task instance. The
interpreter doesn't care, but it's less confusing for both humans and type
checkers.

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 tests/scripts/analyze_outcomes.py | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/tests/scripts/analyze_outcomes.py b/tests/scripts/analyze_outcomes.py
index decc33496a..9f8c2c3c71 100755
--- a/tests/scripts/analyze_outcomes.py
+++ b/tests/scripts/analyze_outcomes.py
@@ -784,17 +784,17 @@ def main():
         options = parser.parse_args()
 
         if options.list:
-            for task in KNOWN_TASKS:
-                print(task)
+            for task_name in KNOWN_TASKS:
+                print(task_name)
             sys.exit(0)
 
         if options.specified_tasks == 'all':
             tasks_list = KNOWN_TASKS.keys()
         else:
             tasks_list = re.split(r'[, ]+', options.specified_tasks)
-            for task in tasks_list:
-                if task not in KNOWN_TASKS:
-                    sys.stderr.write('invalid task: {}\n'.format(task))
+            for task_name in tasks_list:
+                if task_name not in KNOWN_TASKS:
+                    sys.stderr.write('invalid task: {}\n'.format(task_name))
                     sys.exit(2)
 
         # If the outcome file exists, parse it once and share the result
@@ -806,22 +806,22 @@ def main():
                 sys.exit(2)
 
             task_name = tasks_list[0]
-            task = KNOWN_TASKS[task_name]
-            if not issubclass(task, DriverVSReference):
+            task_class = KNOWN_TASKS[task_name]
+            if not issubclass(task_class, DriverVSReference):
                 sys.stderr.write("please provide valid outcomes file for {}.\n".format(task_name))
                 sys.exit(2)
             execute_reference_driver_tests(main_results,
-                                           task.REFERENCE,
-                                           task.DRIVER,
+                                           task_class.REFERENCE,
+                                           task_class.DRIVER,
                                            options.outcomes)
 
         outcomes = read_outcome_file(options.outcomes)
 
         for task_name in tasks_list:
             task_constructor = KNOWN_TASKS[task_name]
-            task = task_constructor(options)
-            main_results.new_section(task.section_name())
-            task.run(main_results, outcomes)
+            task_instance = task_constructor(options)
+            main_results.new_section(task_instance.section_name())
+            task_instance.run(main_results, outcomes)
 
         main_results.info("Overall results: {} warnings and {} errors",
                           main_results.warning_count, main_results.error_count)

From 4d557d8b76d0fa5f9ea05daf2b5949f86c5101b7 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Thu, 3 Oct 2024 18:31:38 +0200
Subject: [PATCH 08/16] Typecheck main

Always have tasks_list be a list, not potentially some fancier iterable.

Bypass mypy's somewhat legitimate complaint about REFERENCE and DRIVER in
task_class: they could potentially be instance attributes, but we rely on
them being class attributes. Python does normally guarantee their existence
as class attributes (unless a derived class explicitly deletes them), but
they could be overridden by an instance attribute; that's just something
we don't do, so the class attribute's value is legitimate. We can't
expect mypy to know that, so work around its complaint.

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 tests/scripts/analyze_outcomes.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/tests/scripts/analyze_outcomes.py b/tests/scripts/analyze_outcomes.py
index 9f8c2c3c71..f681a30ba2 100755
--- a/tests/scripts/analyze_outcomes.py
+++ b/tests/scripts/analyze_outcomes.py
@@ -761,8 +761,7 @@ KNOWN_TASKS = {
     'analyze_block_cipher_dispatch': DriverVSReference_block_cipher_dispatch,
 }
 
-
-def main():
+def main() -> None:
     main_results = Results()
 
     try:
@@ -789,7 +788,7 @@ def main():
             sys.exit(0)
 
         if options.specified_tasks == 'all':
-            tasks_list = KNOWN_TASKS.keys()
+            tasks_list = list(KNOWN_TASKS.keys())
         else:
             tasks_list = re.split(r'[, ]+', options.specified_tasks)
             for task_name in tasks_list:
@@ -810,9 +809,16 @@ def main():
             if not issubclass(task_class, DriverVSReference):
                 sys.stderr.write("please provide valid outcomes file for {}.\n".format(task_name))
                 sys.exit(2)
+            # mypy isn't smart enough to know that REFERENCE and DRIVER
+            # are *class* attributes of all classes derived from
+            # DriverVSReference. (It would be smart enough if we had an
+            # instance of task_class, but we can't construct an instance
+            # until we have the outcome data, so at this point we only
+            # have the class.) So we use indirection to access the class
+            # attributes.
             execute_reference_driver_tests(main_results,
-                                           task_class.REFERENCE,
-                                           task_class.DRIVER,
+                                           getattr(task_class, 'REFERENCE'),
+                                           getattr(task_class, 'DRIVER'),
                                            options.outcomes)
 
         outcomes = read_outcome_file(options.outcomes)

From 39f5d796aeb0a793ae72c636ec67234364fc6866 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Thu, 3 Oct 2024 18:36:09 +0200
Subject: [PATCH 09/16] Pass KNOWN_TASKS as an argument to main

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 tests/scripts/analyze_outcomes.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tests/scripts/analyze_outcomes.py b/tests/scripts/analyze_outcomes.py
index f681a30ba2..8dd8f59a1d 100755
--- a/tests/scripts/analyze_outcomes.py
+++ b/tests/scripts/analyze_outcomes.py
@@ -761,7 +761,7 @@ KNOWN_TASKS = {
     'analyze_block_cipher_dispatch': DriverVSReference_block_cipher_dispatch,
 }
 
-def main() -> None:
+def main(known_tasks: typing.Dict[str, typing.Type[Task]]) -> None:
     main_results = Results()
 
     try:
@@ -783,16 +783,16 @@ def main() -> None:
         options = parser.parse_args()
 
         if options.list:
-            for task_name in KNOWN_TASKS:
+            for task_name in known_tasks:
                 print(task_name)
             sys.exit(0)
 
         if options.specified_tasks == 'all':
-            tasks_list = list(KNOWN_TASKS.keys())
+            tasks_list = list(known_tasks.keys())
         else:
             tasks_list = re.split(r'[, ]+', options.specified_tasks)
             for task_name in tasks_list:
-                if task_name not in KNOWN_TASKS:
+                if task_name not in known_tasks:
                     sys.stderr.write('invalid task: {}\n'.format(task_name))
                     sys.exit(2)
 
@@ -805,7 +805,7 @@ def main() -> None:
                 sys.exit(2)
 
             task_name = tasks_list[0]
-            task_class = KNOWN_TASKS[task_name]
+            task_class = known_tasks[task_name]
             if not issubclass(task_class, DriverVSReference):
                 sys.stderr.write("please provide valid outcomes file for {}.\n".format(task_name))
                 sys.exit(2)
@@ -824,7 +824,7 @@ def main() -> None:
         outcomes = read_outcome_file(options.outcomes)
 
         for task_name in tasks_list:
-            task_constructor = KNOWN_TASKS[task_name]
+            task_constructor = known_tasks[task_name]
             task_instance = task_constructor(options)
             main_results.new_section(task_instance.section_name())
             task_instance.run(main_results, outcomes)
@@ -840,4 +840,4 @@ def main() -> None:
         sys.exit(120)
 
 if __name__ == '__main__':
-    main()
+    main(KNOWN_TASKS)

From 45a32b154978affabef2ec76ff47e81960ae0a6a Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Thu, 3 Oct 2024 18:42:37 +0200
Subject: [PATCH 10/16] Separate code and data of outcome analysis

Place the code of outcome analysis (auxiliary functions, tasks, command line
entry point) into a separate module, which will be moved to the
version-independent framework repository so that it can be shared between
maintained branches. Keep the branch-specific list of driver components and
ignore lists in the per-repository script.

We keep the executable script at `tests/scripts/analyze_outcomes.py`. It's
simpler that way, because that path is hard-coded in CI scripts.

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 tests/scripts/analyze_outcomes.py | 375 ++----------------------------
 tests/scripts/outcome_analysis.py | 353 ++++++++++++++++++++++++++++
 2 files changed, 368 insertions(+), 360 deletions(-)

diff --git a/tests/scripts/analyze_outcomes.py b/tests/scripts/analyze_outcomes.py
index 8dd8f59a1d..080a4fb589 100755
--- a/tests/scripts/analyze_outcomes.py
+++ b/tests/scripts/analyze_outcomes.py
@@ -6,279 +6,13 @@ This script can also run on outcomes from a partial run, but the results are
 less likely to be useful.
 """
 
-import argparse
-import sys
-import traceback
 import re
-import subprocess
-import os
-import typing
 
-import collect_test_cases
+import outcome_analysis
 
 
-# `ComponentOutcomes` is a named tuple which is defined as:
-# ComponentOutcomes(
-#     successes = {
-#         "<suite_case>",
-#         ...
-#     },
-#     failures = {
-#         "<suite_case>",
-#         ...
-#     }
-# )
-# suite_case = "<suite>;<case>"
-ComponentOutcomes = typing.NamedTuple('ComponentOutcomes',
-                                      [('successes', typing.Set[str]),
-                                       ('failures', typing.Set[str])])
-
-# `Outcomes` is a representation of the outcomes file,
-# which defined as:
-# Outcomes = {
-#     "<component>": ComponentOutcomes,
-#     ...
-# }
-Outcomes = typing.Dict[str, ComponentOutcomes]
-
-
-class Results:
-    """Process analysis results."""
-
-    def __init__(self):
-        self.error_count = 0
-        self.warning_count = 0
-
-    def new_section(self, fmt, *args, **kwargs):
-        self._print_line('\n*** ' + fmt + ' ***\n', *args, **kwargs)
-
-    def info(self, fmt, *args, **kwargs):
-        self._print_line('Info: ' + fmt, *args, **kwargs)
-
-    def error(self, fmt, *args, **kwargs):
-        self.error_count += 1
-        self._print_line('Error: ' + fmt, *args, **kwargs)
-
-    def warning(self, fmt, *args, **kwargs):
-        self.warning_count += 1
-        self._print_line('Warning: ' + fmt, *args, **kwargs)
-
-    @staticmethod
-    def _print_line(fmt, *args, **kwargs):
-        sys.stderr.write((fmt + '\n').format(*args, **kwargs))
-
-def execute_reference_driver_tests(results: Results, ref_component: str, driver_component: str, \
-                                   outcome_file: str) -> None:
-    """Run the tests specified in ref_component and driver_component. Results
-    are stored in the output_file and they will be used for the following
-    coverage analysis"""
-    results.new_section("Test {} and {}", ref_component, driver_component)
-
-    shell_command = "tests/scripts/all.sh --outcome-file " + outcome_file + \
-                    " " + ref_component + " " + driver_component
-    results.info("Running: {}", shell_command)
-    ret_val = subprocess.run(shell_command.split(), check=False).returncode
-
-    if ret_val != 0:
-        results.error("failed to run reference/driver components")
-
-IgnoreEntry = typing.Union[str, typing.Pattern]
-
-def name_matches_pattern(name: str, str_or_re: IgnoreEntry) -> bool:
-    """Check if name matches a pattern, that may be a string or regex.
-    - If the pattern is a string, name must be equal to match.
-    - If the pattern is a regex, name must fully match.
-    """
-    # The CI's python is too old for re.Pattern
-    #if isinstance(str_or_re, re.Pattern):
-    if not isinstance(str_or_re, str):
-        return str_or_re.fullmatch(name) is not None
-    else:
-        return str_or_re == name
-
-def read_outcome_file(outcome_file: str) -> Outcomes:
-    """Parse an outcome file and return an outcome collection.
-    """
-    outcomes = {}
-    with open(outcome_file, 'r', encoding='utf-8') as input_file:
-        for line in input_file:
-            (_platform, component, suite, case, result, _cause) = line.split(';')
-            # Note that `component` is not unique. If a test case passes on Linux
-            # and fails on FreeBSD, it'll end up in both the successes set and
-            # the failures set.
-            suite_case = ';'.join([suite, case])
-            if component not in outcomes:
-                outcomes[component] = ComponentOutcomes(set(), set())
-            if result == 'PASS':
-                outcomes[component].successes.add(suite_case)
-            elif result == 'FAIL':
-                outcomes[component].failures.add(suite_case)
-
-    return outcomes
-
-
-class Task:
-    """Base class for outcome analysis tasks."""
-
-    # Override the following in child classes.
-    # Map test suite names (with the test_suite_prefix) to a list of ignored
-    # test cases. Each element in the list can be either a string or a regex;
-    # see the `name_matches_pattern` function.
-    IGNORED_TESTS = {} #type: typing.Dict[str, typing.List[IgnoreEntry]]
-
-    def __init__(self, options) -> None:
-        """Pass command line options to the tasks.
-
-        Each task decides which command line options it cares about.
-        """
-        pass
-
-    def section_name(self) -> str:
-        """The section name to use in results."""
-        raise NotImplementedError
-
-    def ignored_tests(self, test_suite: str) -> typing.Iterator[IgnoreEntry]:
-        """Generate the ignore list for the specified test suite."""
-        if test_suite in self.IGNORED_TESTS:
-            yield from self.IGNORED_TESTS[test_suite]
-        pos = test_suite.find('.')
-        if pos != -1:
-            base_test_suite = test_suite[:pos]
-            if base_test_suite in self.IGNORED_TESTS:
-                yield from self.IGNORED_TESTS[base_test_suite]
-
-    def is_test_case_ignored(self, test_suite: str, test_string: str) -> bool:
-        """Check if the specified test case is ignored."""
-        for str_or_re in self.ignored_tests(test_suite):
-            if name_matches_pattern(test_string, str_or_re):
-                return True
-        return False
-
-    def run(self, results: Results, outcomes: Outcomes):
-        """Run the analysis on the specified outcomes.
-
-        Signal errors via the results objects
-        """
-        raise NotImplementedError
-
-
-class CoverageTask(Task):
-    """Analyze test coverage."""
-
-    # Test cases whose suite and description are matched by an entry in
-    # IGNORED_TESTS are expected to be never executed.
-    # All other test cases are expected to be executed at least once.
-
-    def __init__(self, options) -> None:
-        super().__init__(options)
-        self.full_coverage = options.full_coverage #type: bool
-
-    @staticmethod
-    def section_name() -> str:
-        return "Analyze coverage"
-
-    def run(self, results: Results, outcomes: Outcomes) -> None:
-        """Check that all available test cases are executed at least once."""
-        # Make sure that the generated data files are present (and up-to-date).
-        # This allows analyze_outcomes.py to run correctly on a fresh Git
-        # checkout.
-        cp = subprocess.run(['make', 'generated_files'],
-                            cwd='tests',
-                            stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
-                            check=False)
-        if cp.returncode != 0:
-            sys.stderr.write(cp.stdout.decode('utf-8'))
-            results.error("Failed \"make generated_files\" in tests. "
-                          "Coverage analysis may be incorrect.")
-        available = collect_test_cases.collect_available_test_cases()
-        for suite_case in available:
-            hit = any(suite_case in comp_outcomes.successes or
-                      suite_case in comp_outcomes.failures
-                      for comp_outcomes in outcomes.values())
-            (test_suite, test_description) = suite_case.split(';')
-            ignored = self.is_test_case_ignored(test_suite, test_description)
-
-            if not hit and not ignored:
-                if self.full_coverage:
-                    results.error('Test case not executed: {}', suite_case)
-                else:
-                    results.warning('Test case not executed: {}', suite_case)
-            elif hit and ignored:
-                # If a test case is no longer always skipped, we should remove
-                # it from the ignore list.
-                if self.full_coverage:
-                    results.error('Test case was executed but marked as ignored for coverage: {}',
-                                  suite_case)
-                else:
-                    results.warning('Test case was executed but marked as ignored for coverage: {}',
-                                    suite_case)
-
-
-class DriverVSReference(Task):
-    """Compare outcomes from testing with and without a driver.
-
-    There are 2 options to use analyze_driver_vs_reference_xxx locally:
-    1. Run tests and then analysis:
-      - tests/scripts/all.sh --outcome-file "$PWD/out.csv" <component_ref> <component_driver>
-      - tests/scripts/analyze_outcomes.py out.csv analyze_driver_vs_reference_xxx
-    2. Let this script run both automatically:
-      - tests/scripts/analyze_outcomes.py out.csv analyze_driver_vs_reference_xxx
-    """
-
-    # Override the following in child classes.
-    # Configuration name (all.sh component) used as the reference.
-    REFERENCE = ''
-    # Configuration name (all.sh component) used as the driver.
-    DRIVER = ''
-    # Ignored test suites (without the test_suite_ prefix).
-    IGNORED_SUITES = [] #type: typing.List[str]
-
-    def __init__(self, options) -> None:
-        super().__init__(options)
-        self.ignored_suites = frozenset('test_suite_' + x
-                                        for x in self.IGNORED_SUITES)
-
-    def section_name(self) -> str:
-        return f"Analyze driver {self.DRIVER} vs reference {self.REFERENCE}"
-
-    def run(self, results: Results, outcomes: Outcomes) -> None:
-        """Check that all tests passing in the driver component are also
-        passing in the corresponding reference component.
-        Skip:
-        - full test suites provided in ignored_suites list
-        - only some specific test inside a test suite, for which the corresponding
-          output string is provided
-        """
-        ref_outcomes = outcomes.get("component_" + self.REFERENCE)
-        driver_outcomes = outcomes.get("component_" + self.DRIVER)
-
-        if ref_outcomes is None or driver_outcomes is None:
-            results.error("required components are missing: bad outcome file?")
-            return
-
-        if not ref_outcomes.successes:
-            results.error("no passing test in reference component: bad outcome file?")
-            return
-
-        for suite_case in ref_outcomes.successes:
-            # suite_case is like "test_suite_foo.bar;Description of test case"
-            (full_test_suite, test_string) = suite_case.split(';')
-            test_suite = full_test_suite.split('.')[0] # retrieve main part of test suite name
-
-            # Immediately skip fully-ignored test suites
-            if test_suite in self.ignored_suites or \
-               full_test_suite in self.ignored_suites:
-                continue
-
-            # For ignored test cases inside test suites, just remember and:
-            # don't issue an error if they're skipped with drivers,
-            # but issue an error if they're not (means we have a bad entry).
-            ignored = self.is_test_case_ignored(full_test_suite, test_string)
-
-            if not ignored and not suite_case in driver_outcomes.successes:
-                results.error("SKIP/FAIL -> PASS: {}", suite_case)
-            if ignored and suite_case in driver_outcomes.successes:
-                results.error("uselessly ignored: {}", suite_case)
+class CoverageTask(outcome_analysis.CoverageTask):
+    pass # We'll populate IGNORED_TESTS soon
 
 
 # The names that we give to classes derived from DriverVSReference do not
@@ -288,7 +22,7 @@ class DriverVSReference(Task):
 # documentation.
 #pylint: disable=invalid-name,missing-class-docstring
 
-class DriverVSReference_hash(DriverVSReference):
+class DriverVSReference_hash(outcome_analysis.DriverVSReference):
     REFERENCE = 'test_psa_crypto_config_reference_hash_use_psa'
     DRIVER = 'test_psa_crypto_config_accel_hash_use_psa'
     IGNORED_SUITES = [
@@ -308,7 +42,7 @@ class DriverVSReference_hash(DriverVSReference):
         ],
     }
 
-class DriverVSReference_hmac(DriverVSReference):
+class DriverVSReference_hmac(outcome_analysis.DriverVSReference):
     REFERENCE = 'test_psa_crypto_config_reference_hmac'
     DRIVER = 'test_psa_crypto_config_accel_hmac'
     IGNORED_SUITES = [
@@ -347,7 +81,7 @@ class DriverVSReference_hmac(DriverVSReference):
         ],
     }
 
-class DriverVSReference_cipher_aead_cmac(DriverVSReference):
+class DriverVSReference_cipher_aead_cmac(outcome_analysis.DriverVSReference):
     REFERENCE = 'test_psa_crypto_config_reference_cipher_aead_cmac'
     DRIVER = 'test_psa_crypto_config_accel_cipher_aead_cmac'
     # Modules replaced by drivers.
@@ -414,7 +148,7 @@ class DriverVSReference_cipher_aead_cmac(DriverVSReference):
         ],
     }
 
-class DriverVSReference_ecp_light_only(DriverVSReference):
+class DriverVSReference_ecp_light_only(outcome_analysis.DriverVSReference):
     REFERENCE = 'test_psa_crypto_config_reference_ecc_ecp_light_only'
     DRIVER = 'test_psa_crypto_config_accel_ecc_ecp_light_only'
     IGNORED_SUITES = [
@@ -454,7 +188,7 @@ class DriverVSReference_ecp_light_only(DriverVSReference):
         ],
     }
 
-class DriverVSReference_no_ecp_at_all(DriverVSReference):
+class DriverVSReference_no_ecp_at_all(outcome_analysis.DriverVSReference):
     REFERENCE = 'test_psa_crypto_config_reference_ecc_no_ecp_at_all'
     DRIVER = 'test_psa_crypto_config_accel_ecc_no_ecp_at_all'
     IGNORED_SUITES = [
@@ -492,7 +226,7 @@ class DriverVSReference_no_ecp_at_all(DriverVSReference):
         ],
     }
 
-class DriverVSReference_ecc_no_bignum(DriverVSReference):
+class DriverVSReference_ecc_no_bignum(outcome_analysis.DriverVSReference):
     REFERENCE = 'test_psa_crypto_config_reference_ecc_no_bignum'
     DRIVER = 'test_psa_crypto_config_accel_ecc_no_bignum'
     IGNORED_SUITES = [
@@ -537,7 +271,7 @@ class DriverVSReference_ecc_no_bignum(DriverVSReference):
         ],
     }
 
-class DriverVSReference_ecc_ffdh_no_bignum(DriverVSReference):
+class DriverVSReference_ecc_ffdh_no_bignum(outcome_analysis.DriverVSReference):
     REFERENCE = 'test_psa_crypto_config_reference_ecc_ffdh_no_bignum'
     DRIVER = 'test_psa_crypto_config_accel_ecc_ffdh_no_bignum'
     IGNORED_SUITES = [
@@ -590,7 +324,7 @@ class DriverVSReference_ecc_ffdh_no_bignum(DriverVSReference):
         ],
     }
 
-class DriverVSReference_ffdh_alg(DriverVSReference):
+class DriverVSReference_ffdh_alg(outcome_analysis.DriverVSReference):
     REFERENCE = 'test_psa_crypto_config_reference_ffdh'
     DRIVER = 'test_psa_crypto_config_accel_ffdh'
     IGNORED_SUITES = ['dhm']
@@ -606,7 +340,7 @@ class DriverVSReference_ffdh_alg(DriverVSReference):
         ],
     }
 
-class DriverVSReference_tfm_config(DriverVSReference):
+class DriverVSReference_tfm_config(outcome_analysis.DriverVSReference):
     REFERENCE = 'test_tfm_config_no_p256m'
     DRIVER = 'test_tfm_config_p256m_driver_accel_ec'
     IGNORED_SUITES = [
@@ -638,7 +372,7 @@ class DriverVSReference_tfm_config(DriverVSReference):
         ],
     }
 
-class DriverVSReference_rsa(DriverVSReference):
+class DriverVSReference_rsa(outcome_analysis.DriverVSReference):
     REFERENCE = 'test_psa_crypto_config_reference_rsa_crypto'
     DRIVER = 'test_psa_crypto_config_accel_rsa_crypto'
     IGNORED_SUITES = [
@@ -677,7 +411,7 @@ class DriverVSReference_rsa(DriverVSReference):
         ],
     }
 
-class DriverVSReference_block_cipher_dispatch(DriverVSReference):
+class DriverVSReference_block_cipher_dispatch(outcome_analysis.DriverVSReference):
     REFERENCE = 'test_full_block_cipher_legacy_dispatch'
     DRIVER = 'test_full_block_cipher_psa_dispatch'
     IGNORED_SUITES = [
@@ -744,7 +478,6 @@ class DriverVSReference_block_cipher_dispatch(DriverVSReference):
 #pylint: enable=invalid-name,missing-class-docstring
 
 
-
 # List of tasks with a function that can handle this task and additional arguments if required
 KNOWN_TASKS = {
     'analyze_coverage': CoverageTask,
@@ -761,83 +494,5 @@ KNOWN_TASKS = {
     'analyze_block_cipher_dispatch': DriverVSReference_block_cipher_dispatch,
 }
 
-def main(known_tasks: typing.Dict[str, typing.Type[Task]]) -> None:
-    main_results = Results()
-
-    try:
-        parser = argparse.ArgumentParser(description=__doc__)
-        parser.add_argument('outcomes', metavar='OUTCOMES.CSV',
-                            help='Outcome file to analyze')
-        parser.add_argument('specified_tasks', default='all', nargs='?',
-                            help='Analysis to be done. By default, run all tasks. '
-                                 'With one or more TASK, run only those. '
-                                 'TASK can be the name of a single task or '
-                                 'comma/space-separated list of tasks. ')
-        parser.add_argument('--list', action='store_true',
-                            help='List all available tasks and exit.')
-        parser.add_argument('--require-full-coverage', action='store_true',
-                            dest='full_coverage', help="Require all available "
-                            "test cases to be executed and issue an error "
-                            "otherwise. This flag is ignored if 'task' is "
-                            "neither 'all' nor 'analyze_coverage'")
-        options = parser.parse_args()
-
-        if options.list:
-            for task_name in known_tasks:
-                print(task_name)
-            sys.exit(0)
-
-        if options.specified_tasks == 'all':
-            tasks_list = list(known_tasks.keys())
-        else:
-            tasks_list = re.split(r'[, ]+', options.specified_tasks)
-            for task_name in tasks_list:
-                if task_name not in known_tasks:
-                    sys.stderr.write('invalid task: {}\n'.format(task_name))
-                    sys.exit(2)
-
-        # If the outcome file exists, parse it once and share the result
-        # among tasks to improve performance.
-        # Otherwise, it will be generated by execute_reference_driver_tests.
-        if not os.path.exists(options.outcomes):
-            if len(tasks_list) > 1:
-                sys.stderr.write("mutiple tasks found, please provide a valid outcomes file.\n")
-                sys.exit(2)
-
-            task_name = tasks_list[0]
-            task_class = known_tasks[task_name]
-            if not issubclass(task_class, DriverVSReference):
-                sys.stderr.write("please provide valid outcomes file for {}.\n".format(task_name))
-                sys.exit(2)
-            # mypy isn't smart enough to know that REFERENCE and DRIVER
-            # are *class* attributes of all classes derived from
-            # DriverVSReference. (It would be smart enough if we had an
-            # instance of task_class, but we can't construct an instance
-            # until we have the outcome data, so at this point we only
-            # have the class.) So we use indirection to access the class
-            # attributes.
-            execute_reference_driver_tests(main_results,
-                                           getattr(task_class, 'REFERENCE'),
-                                           getattr(task_class, 'DRIVER'),
-                                           options.outcomes)
-
-        outcomes = read_outcome_file(options.outcomes)
-
-        for task_name in tasks_list:
-            task_constructor = known_tasks[task_name]
-            task_instance = task_constructor(options)
-            main_results.new_section(task_instance.section_name())
-            task_instance.run(main_results, outcomes)
-
-        main_results.info("Overall results: {} warnings and {} errors",
-                          main_results.warning_count, main_results.error_count)
-
-        sys.exit(0 if (main_results.error_count == 0) else 1)
-
-    except Exception: # pylint: disable=broad-except
-        # Print the backtrace and exit explicitly with our chosen status.
-        traceback.print_exc()
-        sys.exit(120)
-
 if __name__ == '__main__':
-    main(KNOWN_TASKS)
+    outcome_analysis.main(KNOWN_TASKS)
diff --git a/tests/scripts/outcome_analysis.py b/tests/scripts/outcome_analysis.py
index 84b83eb81b..e2ad3e7581 100644
--- a/tests/scripts/outcome_analysis.py
+++ b/tests/scripts/outcome_analysis.py
@@ -7,3 +7,356 @@ the classes with branch-specific customizations such as ignore lists.
 
 # Copyright The Mbed TLS Contributors
 # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+
+import argparse
+import sys
+import traceback
+import re
+import subprocess
+import os
+import typing
+
+import collect_test_cases
+
+
+# `ComponentOutcomes` is a named tuple which is defined as:
+# ComponentOutcomes(
+#     successes = {
+#         "<suite_case>",
+#         ...
+#     },
+#     failures = {
+#         "<suite_case>",
+#         ...
+#     }
+# )
+# suite_case = "<suite>;<case>"
+ComponentOutcomes = typing.NamedTuple('ComponentOutcomes',
+                                      [('successes', typing.Set[str]),
+                                       ('failures', typing.Set[str])])
+
+# `Outcomes` is a representation of the outcomes file,
+# which defined as:
+# Outcomes = {
+#     "<component>": ComponentOutcomes,
+#     ...
+# }
+Outcomes = typing.Dict[str, ComponentOutcomes]
+
+
+class Results:
+    """Process analysis results."""
+
+    def __init__(self):
+        self.error_count = 0
+        self.warning_count = 0
+
+    def new_section(self, fmt, *args, **kwargs):
+        self._print_line('\n*** ' + fmt + ' ***\n', *args, **kwargs)
+
+    def info(self, fmt, *args, **kwargs):
+        self._print_line('Info: ' + fmt, *args, **kwargs)
+
+    def error(self, fmt, *args, **kwargs):
+        self.error_count += 1
+        self._print_line('Error: ' + fmt, *args, **kwargs)
+
+    def warning(self, fmt, *args, **kwargs):
+        self.warning_count += 1
+        self._print_line('Warning: ' + fmt, *args, **kwargs)
+
+    @staticmethod
+    def _print_line(fmt, *args, **kwargs):
+        sys.stderr.write((fmt + '\n').format(*args, **kwargs))
+
+def execute_reference_driver_tests(results: Results, ref_component: str, driver_component: str, \
+                                   outcome_file: str) -> None:
+    """Run the tests specified in ref_component and driver_component. Results
+    are stored in the output_file and they will be used for the following
+    coverage analysis"""
+    results.new_section("Test {} and {}", ref_component, driver_component)
+
+    shell_command = "tests/scripts/all.sh --outcome-file " + outcome_file + \
+                    " " + ref_component + " " + driver_component
+    results.info("Running: {}", shell_command)
+    ret_val = subprocess.run(shell_command.split(), check=False).returncode
+
+    if ret_val != 0:
+        results.error("failed to run reference/driver components")
+
+IgnoreEntry = typing.Union[str, typing.Pattern]
+
+def name_matches_pattern(name: str, str_or_re: IgnoreEntry) -> bool:
+    """Check if name matches a pattern, that may be a string or regex.
+    - If the pattern is a string, name must be equal to match.
+    - If the pattern is a regex, name must fully match.
+    """
+    # The CI's python is too old for re.Pattern
+    #if isinstance(str_or_re, re.Pattern):
+    if not isinstance(str_or_re, str):
+        return str_or_re.fullmatch(name) is not None
+    else:
+        return str_or_re == name
+
+def read_outcome_file(outcome_file: str) -> Outcomes:
+    """Parse an outcome file and return an outcome collection.
+    """
+    outcomes = {}
+    with open(outcome_file, 'r', encoding='utf-8') as input_file:
+        for line in input_file:
+            (_platform, component, suite, case, result, _cause) = line.split(';')
+            # Note that `component` is not unique. If a test case passes on Linux
+            # and fails on FreeBSD, it'll end up in both the successes set and
+            # the failures set.
+            suite_case = ';'.join([suite, case])
+            if component not in outcomes:
+                outcomes[component] = ComponentOutcomes(set(), set())
+            if result == 'PASS':
+                outcomes[component].successes.add(suite_case)
+            elif result == 'FAIL':
+                outcomes[component].failures.add(suite_case)
+
+    return outcomes
+
+
+class Task:
+    """Base class for outcome analysis tasks."""
+
+    # Override the following in child classes.
+    # Map test suite names (with the test_suite_prefix) to a list of ignored
+    # test cases. Each element in the list can be either a string or a regex;
+    # see the `name_matches_pattern` function.
+    IGNORED_TESTS = {} #type: typing.Dict[str, typing.List[IgnoreEntry]]
+
+    def __init__(self, options) -> None:
+        """Pass command line options to the tasks.
+
+        Each task decides which command line options it cares about.
+        """
+        pass
+
+    def section_name(self) -> str:
+        """The section name to use in results."""
+        raise NotImplementedError
+
+    def ignored_tests(self, test_suite: str) -> typing.Iterator[IgnoreEntry]:
+        """Generate the ignore list for the specified test suite."""
+        if test_suite in self.IGNORED_TESTS:
+            yield from self.IGNORED_TESTS[test_suite]
+        pos = test_suite.find('.')
+        if pos != -1:
+            base_test_suite = test_suite[:pos]
+            if base_test_suite in self.IGNORED_TESTS:
+                yield from self.IGNORED_TESTS[base_test_suite]
+
+    def is_test_case_ignored(self, test_suite: str, test_string: str) -> bool:
+        """Check if the specified test case is ignored."""
+        for str_or_re in self.ignored_tests(test_suite):
+            if name_matches_pattern(test_string, str_or_re):
+                return True
+        return False
+
+    def run(self, results: Results, outcomes: Outcomes):
+        """Run the analysis on the specified outcomes.
+
+        Signal errors via the results objects
+        """
+        raise NotImplementedError
+
+
+class CoverageTask(Task):
+    """Analyze test coverage."""
+
+    # Test cases whose suite and description are matched by an entry in
+    # IGNORED_TESTS are expected to be never executed.
+    # All other test cases are expected to be executed at least once.
+
+    def __init__(self, options) -> None:
+        super().__init__(options)
+        self.full_coverage = options.full_coverage #type: bool
+
+    @staticmethod
+    def section_name() -> str:
+        return "Analyze coverage"
+
+    def run(self, results: Results, outcomes: Outcomes) -> None:
+        """Check that all available test cases are executed at least once."""
+        # Make sure that the generated data files are present (and up-to-date).
+        # This allows analyze_outcomes.py to run correctly on a fresh Git
+        # checkout.
+        cp = subprocess.run(['make', 'generated_files'],
+                            cwd='tests',
+                            stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+                            check=False)
+        if cp.returncode != 0:
+            sys.stderr.write(cp.stdout.decode('utf-8'))
+            results.error("Failed \"make generated_files\" in tests. "
+                          "Coverage analysis may be incorrect.")
+        available = collect_test_cases.collect_available_test_cases()
+        for suite_case in available:
+            hit = any(suite_case in comp_outcomes.successes or
+                      suite_case in comp_outcomes.failures
+                      for comp_outcomes in outcomes.values())
+            (test_suite, test_description) = suite_case.split(';')
+            ignored = self.is_test_case_ignored(test_suite, test_description)
+
+            if not hit and not ignored:
+                if self.full_coverage:
+                    results.error('Test case not executed: {}', suite_case)
+                else:
+                    results.warning('Test case not executed: {}', suite_case)
+            elif hit and ignored:
+                # If a test case is no longer always skipped, we should remove
+                # it from the ignore list.
+                if self.full_coverage:
+                    results.error('Test case was executed but marked as ignored for coverage: {}',
+                                  suite_case)
+                else:
+                    results.warning('Test case was executed but marked as ignored for coverage: {}',
+                                    suite_case)
+
+
+class DriverVSReference(Task):
+    """Compare outcomes from testing with and without a driver.
+
+    There are 2 options to use analyze_driver_vs_reference_xxx locally:
+    1. Run tests and then analysis:
+      - tests/scripts/all.sh --outcome-file "$PWD/out.csv" <component_ref> <component_driver>
+      - tests/scripts/analyze_outcomes.py out.csv analyze_driver_vs_reference_xxx
+    2. Let this script run both automatically:
+      - tests/scripts/analyze_outcomes.py out.csv analyze_driver_vs_reference_xxx
+    """
+
+    # Override the following in child classes.
+    # Configuration name (all.sh component) used as the reference.
+    REFERENCE = ''
+    # Configuration name (all.sh component) used as the driver.
+    DRIVER = ''
+    # Ignored test suites (without the test_suite_ prefix).
+    IGNORED_SUITES = [] #type: typing.List[str]
+
+    def __init__(self, options) -> None:
+        super().__init__(options)
+        self.ignored_suites = frozenset('test_suite_' + x
+                                        for x in self.IGNORED_SUITES)
+
+    def section_name(self) -> str:
+        return f"Analyze driver {self.DRIVER} vs reference {self.REFERENCE}"
+
+    def run(self, results: Results, outcomes: Outcomes) -> None:
+        """Check that all tests passing in the driver component are also
+        passing in the corresponding reference component.
+        Skip:
+        - full test suites provided in ignored_suites list
+        - only some specific test inside a test suite, for which the corresponding
+          output string is provided
+        """
+        ref_outcomes = outcomes.get("component_" + self.REFERENCE)
+        driver_outcomes = outcomes.get("component_" + self.DRIVER)
+
+        if ref_outcomes is None or driver_outcomes is None:
+            results.error("required components are missing: bad outcome file?")
+            return
+
+        if not ref_outcomes.successes:
+            results.error("no passing test in reference component: bad outcome file?")
+            return
+
+        for suite_case in ref_outcomes.successes:
+            # suite_case is like "test_suite_foo.bar;Description of test case"
+            (full_test_suite, test_string) = suite_case.split(';')
+            test_suite = full_test_suite.split('.')[0] # retrieve main part of test suite name
+
+            # Immediately skip fully-ignored test suites
+            if test_suite in self.ignored_suites or \
+               full_test_suite in self.ignored_suites:
+                continue
+
+            # For ignored test cases inside test suites, just remember and:
+            # don't issue an error if they're skipped with drivers,
+            # but issue an error if they're not (means we have a bad entry).
+            ignored = self.is_test_case_ignored(full_test_suite, test_string)
+
+            if not ignored and not suite_case in driver_outcomes.successes:
+                results.error("SKIP/FAIL -> PASS: {}", suite_case)
+            if ignored and suite_case in driver_outcomes.successes:
+                results.error("uselessly ignored: {}", suite_case)
+
+
+def main(known_tasks: typing.Dict[str, typing.Type[Task]]) -> None:
+    main_results = Results()
+
+    try:
+        parser = argparse.ArgumentParser(description=__doc__)
+        parser.add_argument('outcomes', metavar='OUTCOMES.CSV',
+                            help='Outcome file to analyze')
+        parser.add_argument('specified_tasks', default='all', nargs='?',
+                            help='Analysis to be done. By default, run all tasks. '
+                                 'With one or more TASK, run only those. '
+                                 'TASK can be the name of a single task or '
+                                 'comma/space-separated list of tasks. ')
+        parser.add_argument('--list', action='store_true',
+                            help='List all available tasks and exit.')
+        parser.add_argument('--require-full-coverage', action='store_true',
+                            dest='full_coverage', help="Require all available "
+                            "test cases to be executed and issue an error "
+                            "otherwise. This flag is ignored if 'task' is "
+                            "neither 'all' nor 'analyze_coverage'")
+        options = parser.parse_args()
+
+        if options.list:
+            for task_name in known_tasks:
+                print(task_name)
+            sys.exit(0)
+
+        if options.specified_tasks == 'all':
+            tasks_list = list(known_tasks.keys())
+        else:
+            tasks_list = re.split(r'[, ]+', options.specified_tasks)
+            for task_name in tasks_list:
+                if task_name not in known_tasks:
+                    sys.stderr.write('invalid task: {}\n'.format(task_name))
+                    sys.exit(2)
+
+        # If the outcome file exists, parse it once and share the result
+        # among tasks to improve performance.
+        # Otherwise, it will be generated by execute_reference_driver_tests.
+        if not os.path.exists(options.outcomes):
+            if len(tasks_list) > 1:
+                sys.stderr.write("mutiple tasks found, please provide a valid outcomes file.\n")
+                sys.exit(2)
+
+            task_name = tasks_list[0]
+            task_class = known_tasks[task_name]
+            if not issubclass(task_class, DriverVSReference):
+                sys.stderr.write("please provide valid outcomes file for {}.\n".format(task_name))
+                sys.exit(2)
+            # mypy isn't smart enough to know that REFERENCE and DRIVER
+            # are *class* attributes of all classes derived from
+            # DriverVSReference. (It would be smart enough if we had an
+            # instance of task_class, but we can't construct an instance
+            # until we have the outcome data, so at this point we only
+            # have the class.) So we use indirection to access the class
+            # attributes.
+            execute_reference_driver_tests(main_results,
+                                           getattr(task_class, 'REFERENCE'),
+                                           getattr(task_class, 'DRIVER'),
+                                           options.outcomes)
+
+        outcomes = read_outcome_file(options.outcomes)
+
+        for task_name in tasks_list:
+            task_constructor = known_tasks[task_name]
+            task_instance = task_constructor(options)
+            main_results.new_section(task_instance.section_name())
+            task_instance.run(main_results, outcomes)
+
+        main_results.info("Overall results: {} warnings and {} errors",
+                          main_results.warning_count, main_results.error_count)
+
+        sys.exit(0 if (main_results.error_count == 0) else 1)
+
+    except Exception: # pylint: disable=broad-except
+        # Print the backtrace and exit explicitly with our chosen status.
+        traceback.print_exc()
+        sys.exit(120)

From 738a59795329f49f35c3ff7632e19f6c1706ad29 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Thu, 3 Oct 2024 18:52:58 +0200
Subject: [PATCH 11/16] Adjust paths for impending moves to the framework

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 docs/architecture/testing/test-framework.md | 2 +-
 tests/scripts/analyze_outcomes.py           | 3 ++-
 tests/scripts/check_test_cases.py           | 2 +-
 tests/scripts/components-basic-checks.sh    | 2 +-
 4 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/docs/architecture/testing/test-framework.md b/docs/architecture/testing/test-framework.md
index 80667df920..a9e3dac47e 100644
--- a/docs/architecture/testing/test-framework.md
+++ b/docs/architecture/testing/test-framework.md
@@ -22,7 +22,7 @@ Each test case has a description which succinctly describes for a human audience
 * Make the description descriptive. “foo: x=2, y=4” is more descriptive than “foo #2”. “foo: 0<x<y, both even” is even better if these inequalities and parities are why this particular test data was chosen.
 * Avoid changing the description of an existing test case without a good reason. This breaks the tracking of failures across CI runs, since this tracking is based on the descriptions.
 
-`tests/scripts/check_test_cases.py` enforces some rules and warns if some guidelines are violated.
+`framework/scripts/check_test_cases.py` enforces some rules and warns if some guidelines are violated.
 
 ## TLS tests
 
diff --git a/tests/scripts/analyze_outcomes.py b/tests/scripts/analyze_outcomes.py
index 080a4fb589..c40dab55e6 100755
--- a/tests/scripts/analyze_outcomes.py
+++ b/tests/scripts/analyze_outcomes.py
@@ -8,7 +8,8 @@ less likely to be useful.
 
 import re
 
-import outcome_analysis
+import scripts_path # pylint: disable=unused-import
+from mbedtls_framework import outcome_analysis
 
 
 class CoverageTask(outcome_analysis.CoverageTask):
diff --git a/tests/scripts/check_test_cases.py b/tests/scripts/check_test_cases.py
index b00319bad7..f1185d2f7c 100755
--- a/tests/scripts/check_test_cases.py
+++ b/tests/scripts/check_test_cases.py
@@ -14,7 +14,7 @@ import re
 import sys
 
 import scripts_path # pylint: disable=unused-import
-import collect_test_cases
+from mbedtls_framework import collect_test_cases
 
 
 class DescriptionChecker(collect_test_cases.TestDescriptionExplorer):
diff --git a/tests/scripts/components-basic-checks.sh b/tests/scripts/components-basic-checks.sh
index 053aacfe7f..d28f3b371a 100644
--- a/tests/scripts/components-basic-checks.sh
+++ b/tests/scripts/components-basic-checks.sh
@@ -68,7 +68,7 @@ component_check_test_cases () {
     else
         opt=''
     fi
-    tests/scripts/check_test_cases.py -q $opt
+    framework/scripts/check_test_cases.py -q $opt
     unset opt
 }
 

From 8981203aaa7837104820c436a2296185fc402055 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Wed, 9 Oct 2024 13:49:40 +0200
Subject: [PATCH 12/16] Move test case analysis modules to framework repository

Move `collect_test_cases.py` (split from `check_test_cases.py`),
`check_test_cases.py`, and `outcome_analysis.py` (split from
`analyze_outcomes.py`) to the framework repository.

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 tests/scripts/check_test_cases.py   |  87 -------
 tests/scripts/collect_test_cases.py | 167 -------------
 tests/scripts/outcome_analysis.py   | 362 ----------------------------
 3 files changed, 616 deletions(-)
 delete mode 100755 tests/scripts/check_test_cases.py
 delete mode 100644 tests/scripts/collect_test_cases.py
 delete mode 100644 tests/scripts/outcome_analysis.py

diff --git a/tests/scripts/check_test_cases.py b/tests/scripts/check_test_cases.py
deleted file mode 100755
index f1185d2f7c..0000000000
--- a/tests/scripts/check_test_cases.py
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/usr/bin/env python3
-
-"""Sanity checks for test data.
-
-This program contains a class for traversing test cases that can be used
-independently of the checks.
-"""
-
-# Copyright The Mbed TLS Contributors
-# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
-
-import argparse
-import re
-import sys
-
-import scripts_path # pylint: disable=unused-import
-from mbedtls_framework import collect_test_cases
-
-
-class DescriptionChecker(collect_test_cases.TestDescriptionExplorer):
-    """Check all test case descriptions.
-
-* Check that each description is valid (length, allowed character set, etc.).
-* Check that there is no duplicated description inside of one test suite.
-"""
-
-    def __init__(self, results):
-        self.results = results
-
-    def new_per_file_state(self):
-        """Dictionary mapping descriptions to their line number."""
-        return {}
-
-    def process_test_case(self, per_file_state,
-                          file_name, line_number, description):
-        """Check test case descriptions for errors."""
-        results = self.results
-        seen = per_file_state
-        if description in seen:
-            results.error(file_name, line_number,
-                          'Duplicate description (also line {})',
-                          seen[description])
-            return
-        if re.search(br'[\t;]', description):
-            results.error(file_name, line_number,
-                          'Forbidden character \'{}\' in description',
-                          re.search(br'[\t;]', description).group(0).decode('ascii'))
-        if re.search(br'[^ -~]', description):
-            results.error(file_name, line_number,
-                          'Non-ASCII character in description')
-        if len(description) > 66:
-            results.warning(file_name, line_number,
-                            'Test description too long ({} > 66)',
-                            len(description))
-        seen[description] = line_number
-
-def main():
-    parser = argparse.ArgumentParser(description=__doc__)
-    parser.add_argument('--list-all',
-                        action='store_true',
-                        help='List all test cases, without doing checks')
-    parser.add_argument('--quiet', '-q',
-                        action='store_true',
-                        help='Hide warnings')
-    parser.add_argument('--verbose', '-v',
-                        action='store_false', dest='quiet',
-                        help='Show warnings (default: on; undoes --quiet)')
-    options = parser.parse_args()
-    if options.list_all:
-        descriptions = collect_test_cases.collect_available_test_cases()
-        sys.stdout.write('\n'.join(descriptions + ['']))
-        return
-    results = collect_test_cases.Results(options)
-    checker = DescriptionChecker(results)
-    try:
-        checker.walk_all()
-    except collect_test_cases.ScriptOutputError as e:
-        results.error(e.script_name, e.idx,
-                      '"{}" should be listed as "<suite_name>;<description>"',
-                      e.line)
-    if (results.warnings or results.errors) and not options.quiet:
-        sys.stderr.write('{}: {} errors, {} warnings\n'
-                         .format(sys.argv[0], results.errors, results.warnings))
-    sys.exit(1 if results.errors else 0)
-
-if __name__ == '__main__':
-    main()
diff --git a/tests/scripts/collect_test_cases.py b/tests/scripts/collect_test_cases.py
deleted file mode 100644
index c326710b67..0000000000
--- a/tests/scripts/collect_test_cases.py
+++ /dev/null
@@ -1,167 +0,0 @@
-"""Discover all the test cases (unit tests and SSL tests)."""
-
-# Copyright The Mbed TLS Contributors
-# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
-
-import glob
-import os
-import re
-import subprocess
-import sys
-
-import scripts_path # pylint: disable=unused-import
-from mbedtls_framework import build_tree
-
-
-class ScriptOutputError(ValueError):
-    """A kind of ValueError that indicates we found
-    the script doesn't list test cases in an expected
-    pattern.
-    """
-
-    @property
-    def script_name(self):
-        return super().args[0]
-
-    @property
-    def idx(self):
-        return super().args[1]
-
-    @property
-    def line(self):
-        return super().args[2]
-
-class Results:
-    """Store file and line information about errors or warnings in test suites."""
-
-    def __init__(self, options):
-        self.errors = 0
-        self.warnings = 0
-        self.ignore_warnings = options.quiet
-
-    def error(self, file_name, line_number, fmt, *args):
-        sys.stderr.write(('{}:{}:ERROR:' + fmt + '\n').
-                         format(file_name, line_number, *args))
-        self.errors += 1
-
-    def warning(self, file_name, line_number, fmt, *args):
-        if not self.ignore_warnings:
-            sys.stderr.write(('{}:{}:Warning:' + fmt + '\n')
-                             .format(file_name, line_number, *args))
-            self.warnings += 1
-
-class TestDescriptionExplorer:
-    """An iterator over test cases with descriptions.
-
-The test cases that have descriptions are:
-* Individual unit tests (entries in a .data file) in test suites.
-* Individual test cases in ssl-opt.sh.
-
-This is an abstract class. To use it, derive a class that implements
-the process_test_case method, and call walk_all().
-"""
-
-    def process_test_case(self, per_file_state,
-                          file_name, line_number, description):
-        """Process a test case.
-
-per_file_state: an object created by new_per_file_state() at the beginning
-                of each file.
-file_name: a relative path to the file containing the test case.
-line_number: the line number in the given file.
-description: the test case description as a byte string.
-"""
-        raise NotImplementedError
-
-    def new_per_file_state(self):
-        """Return a new per-file state object.
-
-The default per-file state object is None. Child classes that require per-file
-state may override this method.
-"""
-        #pylint: disable=no-self-use
-        return None
-
-    def walk_test_suite(self, data_file_name):
-        """Iterate over the test cases in the given unit test data file."""
-        in_paragraph = False
-        descriptions = self.new_per_file_state() # pylint: disable=assignment-from-none
-        with open(data_file_name, 'rb') as data_file:
-            for line_number, line in enumerate(data_file, 1):
-                line = line.rstrip(b'\r\n')
-                if not line:
-                    in_paragraph = False
-                    continue
-                if line.startswith(b'#'):
-                    continue
-                if not in_paragraph:
-                    # This is a test case description line.
-                    self.process_test_case(descriptions,
-                                           data_file_name, line_number, line)
-                in_paragraph = True
-
-    def collect_from_script(self, script_name):
-        """Collect the test cases in a script by calling its listing test cases
-option"""
-        descriptions = self.new_per_file_state() # pylint: disable=assignment-from-none
-        listed = subprocess.check_output(['sh', script_name, '--list-test-cases'])
-        # Assume test file is responsible for printing identical format of
-        # test case description between --list-test-cases and its OUTCOME.CSV
-        #
-        # idx indicates the number of test case since there is no line number
-        # in the script for each test case.
-        for idx, line in enumerate(listed.splitlines()):
-            # We are expecting the script to list the test cases in
-            # `<suite_name>;<description>` pattern.
-            script_outputs = line.split(b';', 1)
-            if len(script_outputs) == 2:
-                suite_name, description = script_outputs
-            else:
-                raise ScriptOutputError(script_name, idx, line.decode("utf-8"))
-
-            self.process_test_case(descriptions,
-                                   suite_name.decode('utf-8'),
-                                   idx,
-                                   description.rstrip())
-
-    @staticmethod
-    def collect_test_directories():
-        """Get the relative path for the TLS and Crypto test directories."""
-        mbedtls_root = build_tree.guess_mbedtls_root()
-        directories = [os.path.join(mbedtls_root, 'tests'),
-                       os.path.join(mbedtls_root, 'tf-psa-crypto', 'tests')]
-        directories = [os.path.relpath(p) for p in directories]
-        return directories
-
-    def walk_all(self):
-        """Iterate over all named test cases."""
-        test_directories = self.collect_test_directories()
-        for directory in test_directories:
-            for data_file_name in glob.glob(os.path.join(directory, 'suites',
-                                                         '*.data')):
-                self.walk_test_suite(data_file_name)
-
-            for sh_file in ['ssl-opt.sh', 'compat.sh']:
-                sh_file = os.path.join(directory, sh_file)
-                if os.path.isfile(sh_file):
-                    self.collect_from_script(sh_file)
-
-class TestDescriptions(TestDescriptionExplorer):
-    """Collect the available test cases."""
-
-    def __init__(self):
-        super().__init__()
-        self.descriptions = set()
-
-    def process_test_case(self, _per_file_state,
-                          file_name, _line_number, description):
-        """Record an available test case."""
-        base_name = re.sub(r'\.[^.]*$', '', re.sub(r'.*/', '', file_name))
-        key = ';'.join([base_name, description.decode('utf-8')])
-        self.descriptions.add(key)
-
-def collect_available_test_cases():
-    """Collect the available test cases."""
-    explorer = TestDescriptions()
-    explorer.walk_all()
-    return sorted(explorer.descriptions)
diff --git a/tests/scripts/outcome_analysis.py b/tests/scripts/outcome_analysis.py
deleted file mode 100644
index e2ad3e7581..0000000000
--- a/tests/scripts/outcome_analysis.py
+++ /dev/null
@@ -1,362 +0,0 @@
-"""Outcome file analysis code.
-
-This module is the bulk of the code of tests/scripts/analyze_outcomes.py
-in each consuming branch. The consuming script is expected to derive
-the classes with branch-specific customizations such as ignore lists.
-"""
-
-# Copyright The Mbed TLS Contributors
-# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
-
-import argparse
-import sys
-import traceback
-import re
-import subprocess
-import os
-import typing
-
-import collect_test_cases
-
-
-# `ComponentOutcomes` is a named tuple which is defined as:
-# ComponentOutcomes(
-#     successes = {
-#         "<suite_case>",
-#         ...
-#     },
-#     failures = {
-#         "<suite_case>",
-#         ...
-#     }
-# )
-# suite_case = "<suite>;<case>"
-ComponentOutcomes = typing.NamedTuple('ComponentOutcomes',
-                                      [('successes', typing.Set[str]),
-                                       ('failures', typing.Set[str])])
-
-# `Outcomes` is a representation of the outcomes file,
-# which defined as:
-# Outcomes = {
-#     "<component>": ComponentOutcomes,
-#     ...
-# }
-Outcomes = typing.Dict[str, ComponentOutcomes]
-
-
-class Results:
-    """Process analysis results."""
-
-    def __init__(self):
-        self.error_count = 0
-        self.warning_count = 0
-
-    def new_section(self, fmt, *args, **kwargs):
-        self._print_line('\n*** ' + fmt + ' ***\n', *args, **kwargs)
-
-    def info(self, fmt, *args, **kwargs):
-        self._print_line('Info: ' + fmt, *args, **kwargs)
-
-    def error(self, fmt, *args, **kwargs):
-        self.error_count += 1
-        self._print_line('Error: ' + fmt, *args, **kwargs)
-
-    def warning(self, fmt, *args, **kwargs):
-        self.warning_count += 1
-        self._print_line('Warning: ' + fmt, *args, **kwargs)
-
-    @staticmethod
-    def _print_line(fmt, *args, **kwargs):
-        sys.stderr.write((fmt + '\n').format(*args, **kwargs))
-
-def execute_reference_driver_tests(results: Results, ref_component: str, driver_component: str, \
-                                   outcome_file: str) -> None:
-    """Run the tests specified in ref_component and driver_component. Results
-    are stored in the output_file and they will be used for the following
-    coverage analysis"""
-    results.new_section("Test {} and {}", ref_component, driver_component)
-
-    shell_command = "tests/scripts/all.sh --outcome-file " + outcome_file + \
-                    " " + ref_component + " " + driver_component
-    results.info("Running: {}", shell_command)
-    ret_val = subprocess.run(shell_command.split(), check=False).returncode
-
-    if ret_val != 0:
-        results.error("failed to run reference/driver components")
-
-IgnoreEntry = typing.Union[str, typing.Pattern]
-
-def name_matches_pattern(name: str, str_or_re: IgnoreEntry) -> bool:
-    """Check if name matches a pattern, that may be a string or regex.
-    - If the pattern is a string, name must be equal to match.
-    - If the pattern is a regex, name must fully match.
-    """
-    # The CI's python is too old for re.Pattern
-    #if isinstance(str_or_re, re.Pattern):
-    if not isinstance(str_or_re, str):
-        return str_or_re.fullmatch(name) is not None
-    else:
-        return str_or_re == name
-
-def read_outcome_file(outcome_file: str) -> Outcomes:
-    """Parse an outcome file and return an outcome collection.
-    """
-    outcomes = {}
-    with open(outcome_file, 'r', encoding='utf-8') as input_file:
-        for line in input_file:
-            (_platform, component, suite, case, result, _cause) = line.split(';')
-            # Note that `component` is not unique. If a test case passes on Linux
-            # and fails on FreeBSD, it'll end up in both the successes set and
-            # the failures set.
-            suite_case = ';'.join([suite, case])
-            if component not in outcomes:
-                outcomes[component] = ComponentOutcomes(set(), set())
-            if result == 'PASS':
-                outcomes[component].successes.add(suite_case)
-            elif result == 'FAIL':
-                outcomes[component].failures.add(suite_case)
-
-    return outcomes
-
-
-class Task:
-    """Base class for outcome analysis tasks."""
-
-    # Override the following in child classes.
-    # Map test suite names (with the test_suite_prefix) to a list of ignored
-    # test cases. Each element in the list can be either a string or a regex;
-    # see the `name_matches_pattern` function.
-    IGNORED_TESTS = {} #type: typing.Dict[str, typing.List[IgnoreEntry]]
-
-    def __init__(self, options) -> None:
-        """Pass command line options to the tasks.
-
-        Each task decides which command line options it cares about.
-        """
-        pass
-
-    def section_name(self) -> str:
-        """The section name to use in results."""
-        raise NotImplementedError
-
-    def ignored_tests(self, test_suite: str) -> typing.Iterator[IgnoreEntry]:
-        """Generate the ignore list for the specified test suite."""
-        if test_suite in self.IGNORED_TESTS:
-            yield from self.IGNORED_TESTS[test_suite]
-        pos = test_suite.find('.')
-        if pos != -1:
-            base_test_suite = test_suite[:pos]
-            if base_test_suite in self.IGNORED_TESTS:
-                yield from self.IGNORED_TESTS[base_test_suite]
-
-    def is_test_case_ignored(self, test_suite: str, test_string: str) -> bool:
-        """Check if the specified test case is ignored."""
-        for str_or_re in self.ignored_tests(test_suite):
-            if name_matches_pattern(test_string, str_or_re):
-                return True
-        return False
-
-    def run(self, results: Results, outcomes: Outcomes):
-        """Run the analysis on the specified outcomes.
-
-        Signal errors via the results objects
-        """
-        raise NotImplementedError
-
-
-class CoverageTask(Task):
-    """Analyze test coverage."""
-
-    # Test cases whose suite and description are matched by an entry in
-    # IGNORED_TESTS are expected to be never executed.
-    # All other test cases are expected to be executed at least once.
-
-    def __init__(self, options) -> None:
-        super().__init__(options)
-        self.full_coverage = options.full_coverage #type: bool
-
-    @staticmethod
-    def section_name() -> str:
-        return "Analyze coverage"
-
-    def run(self, results: Results, outcomes: Outcomes) -> None:
-        """Check that all available test cases are executed at least once."""
-        # Make sure that the generated data files are present (and up-to-date).
-        # This allows analyze_outcomes.py to run correctly on a fresh Git
-        # checkout.
-        cp = subprocess.run(['make', 'generated_files'],
-                            cwd='tests',
-                            stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
-                            check=False)
-        if cp.returncode != 0:
-            sys.stderr.write(cp.stdout.decode('utf-8'))
-            results.error("Failed \"make generated_files\" in tests. "
-                          "Coverage analysis may be incorrect.")
-        available = collect_test_cases.collect_available_test_cases()
-        for suite_case in available:
-            hit = any(suite_case in comp_outcomes.successes or
-                      suite_case in comp_outcomes.failures
-                      for comp_outcomes in outcomes.values())
-            (test_suite, test_description) = suite_case.split(';')
-            ignored = self.is_test_case_ignored(test_suite, test_description)
-
-            if not hit and not ignored:
-                if self.full_coverage:
-                    results.error('Test case not executed: {}', suite_case)
-                else:
-                    results.warning('Test case not executed: {}', suite_case)
-            elif hit and ignored:
-                # If a test case is no longer always skipped, we should remove
-                # it from the ignore list.
-                if self.full_coverage:
-                    results.error('Test case was executed but marked as ignored for coverage: {}',
-                                  suite_case)
-                else:
-                    results.warning('Test case was executed but marked as ignored for coverage: {}',
-                                    suite_case)
-
-
-class DriverVSReference(Task):
-    """Compare outcomes from testing with and without a driver.
-
-    There are 2 options to use analyze_driver_vs_reference_xxx locally:
-    1. Run tests and then analysis:
-      - tests/scripts/all.sh --outcome-file "$PWD/out.csv" <component_ref> <component_driver>
-      - tests/scripts/analyze_outcomes.py out.csv analyze_driver_vs_reference_xxx
-    2. Let this script run both automatically:
-      - tests/scripts/analyze_outcomes.py out.csv analyze_driver_vs_reference_xxx
-    """
-
-    # Override the following in child classes.
-    # Configuration name (all.sh component) used as the reference.
-    REFERENCE = ''
-    # Configuration name (all.sh component) used as the driver.
-    DRIVER = ''
-    # Ignored test suites (without the test_suite_ prefix).
-    IGNORED_SUITES = [] #type: typing.List[str]
-
-    def __init__(self, options) -> None:
-        super().__init__(options)
-        self.ignored_suites = frozenset('test_suite_' + x
-                                        for x in self.IGNORED_SUITES)
-
-    def section_name(self) -> str:
-        return f"Analyze driver {self.DRIVER} vs reference {self.REFERENCE}"
-
-    def run(self, results: Results, outcomes: Outcomes) -> None:
-        """Check that all tests passing in the driver component are also
-        passing in the corresponding reference component.
-        Skip:
-        - full test suites provided in ignored_suites list
-        - only some specific test inside a test suite, for which the corresponding
-          output string is provided
-        """
-        ref_outcomes = outcomes.get("component_" + self.REFERENCE)
-        driver_outcomes = outcomes.get("component_" + self.DRIVER)
-
-        if ref_outcomes is None or driver_outcomes is None:
-            results.error("required components are missing: bad outcome file?")
-            return
-
-        if not ref_outcomes.successes:
-            results.error("no passing test in reference component: bad outcome file?")
-            return
-
-        for suite_case in ref_outcomes.successes:
-            # suite_case is like "test_suite_foo.bar;Description of test case"
-            (full_test_suite, test_string) = suite_case.split(';')
-            test_suite = full_test_suite.split('.')[0] # retrieve main part of test suite name
-
-            # Immediately skip fully-ignored test suites
-            if test_suite in self.ignored_suites or \
-               full_test_suite in self.ignored_suites:
-                continue
-
-            # For ignored test cases inside test suites, just remember and:
-            # don't issue an error if they're skipped with drivers,
-            # but issue an error if they're not (means we have a bad entry).
-            ignored = self.is_test_case_ignored(full_test_suite, test_string)
-
-            if not ignored and not suite_case in driver_outcomes.successes:
-                results.error("SKIP/FAIL -> PASS: {}", suite_case)
-            if ignored and suite_case in driver_outcomes.successes:
-                results.error("uselessly ignored: {}", suite_case)
-
-
-def main(known_tasks: typing.Dict[str, typing.Type[Task]]) -> None:
-    main_results = Results()
-
-    try:
-        parser = argparse.ArgumentParser(description=__doc__)
-        parser.add_argument('outcomes', metavar='OUTCOMES.CSV',
-                            help='Outcome file to analyze')
-        parser.add_argument('specified_tasks', default='all', nargs='?',
-                            help='Analysis to be done. By default, run all tasks. '
-                                 'With one or more TASK, run only those. '
-                                 'TASK can be the name of a single task or '
-                                 'comma/space-separated list of tasks. ')
-        parser.add_argument('--list', action='store_true',
-                            help='List all available tasks and exit.')
-        parser.add_argument('--require-full-coverage', action='store_true',
-                            dest='full_coverage', help="Require all available "
-                            "test cases to be executed and issue an error "
-                            "otherwise. This flag is ignored if 'task' is "
-                            "neither 'all' nor 'analyze_coverage'")
-        options = parser.parse_args()
-
-        if options.list:
-            for task_name in known_tasks:
-                print(task_name)
-            sys.exit(0)
-
-        if options.specified_tasks == 'all':
-            tasks_list = list(known_tasks.keys())
-        else:
-            tasks_list = re.split(r'[, ]+', options.specified_tasks)
-            for task_name in tasks_list:
-                if task_name not in known_tasks:
-                    sys.stderr.write('invalid task: {}\n'.format(task_name))
-                    sys.exit(2)
-
-        # If the outcome file exists, parse it once and share the result
-        # among tasks to improve performance.
-        # Otherwise, it will be generated by execute_reference_driver_tests.
-        if not os.path.exists(options.outcomes):
-            if len(tasks_list) > 1:
-                sys.stderr.write("mutiple tasks found, please provide a valid outcomes file.\n")
-                sys.exit(2)
-
-            task_name = tasks_list[0]
-            task_class = known_tasks[task_name]
-            if not issubclass(task_class, DriverVSReference):
-                sys.stderr.write("please provide valid outcomes file for {}.\n".format(task_name))
-                sys.exit(2)
-            # mypy isn't smart enough to know that REFERENCE and DRIVER
-            # are *class* attributes of all classes derived from
-            # DriverVSReference. (It would be smart enough if we had an
-            # instance of task_class, but we can't construct an instance
-            # until we have the outcome data, so at this point we only
-            # have the class.) So we use indirection to access the class
-            # attributes.
-            execute_reference_driver_tests(main_results,
-                                           getattr(task_class, 'REFERENCE'),
-                                           getattr(task_class, 'DRIVER'),
-                                           options.outcomes)
-
-        outcomes = read_outcome_file(options.outcomes)
-
-        for task_name in tasks_list:
-            task_constructor = known_tasks[task_name]
-            task_instance = task_constructor(options)
-            main_results.new_section(task_instance.section_name())
-            task_instance.run(main_results, outcomes)
-
-        main_results.info("Overall results: {} warnings and {} errors",
-                          main_results.warning_count, main_results.error_count)
-
-        sys.exit(0 if (main_results.error_count == 0) else 1)
-
-    except Exception: # pylint: disable=broad-except
-        # Print the backtrace and exit explicitly with our chosen status.
-        traceback.print_exc()
-        sys.exit(120)

From 309051dc25c89a3724a23e002d6e7a7bde6af0d5 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Mon, 23 Sep 2024 18:03:10 +0200
Subject: [PATCH 13/16] Upgrade mypy to the last version supporting Python 3.6

Upgrade mypy to 0.971, which is the last version that supports Python 3.6
(the oldest Python version that we currently run on the CI).

This fixes the error
```
framework/scripts/mbedtls_framework/outcome_analysis.py:119: error: Incompatible return value type (got "IO[Any]", expected "TextIO")
framework/scripts/mbedtls_framework/outcome_analysis.py:121: error: Incompatible return value type (got "IO[Any]", expected "TextIO")
```
As far as I can tell the fix is https://github.com/python/mypy/pull/9275
which was released in mypy 0.940.

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 scripts/ci.requirements.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/ci.requirements.txt b/scripts/ci.requirements.txt
index d21aa27988..1ef8abd58a 100644
--- a/scripts/ci.requirements.txt
+++ b/scripts/ci.requirements.txt
@@ -7,9 +7,9 @@
 # 2.4.4 is the version in Ubuntu 20.04. It supports Python >=3.5.
 pylint == 2.4.4
 
-# Use the earliest version of mypy that works with our code base.
-# See https://github.com/Mbed-TLS/mbedtls/pull/3953 .
-mypy >= 0.780
+# Use the last version of mypy that is compatible with Python 3.6.
+# Newer versions should be ok too.
+mypy >= 0.971
 
 # At the time of writing, only needed for tests/scripts/audit-validity-dates.py.
 # It needs >=35.0.0 for correct operation, and that requires Python >=3.6,

From e816f1ef4933769c3b8fd0f2bd4f693308ee7dce Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Mon, 23 Sep 2024 19:16:47 +0200
Subject: [PATCH 14/16] Downgrade mypy to a version that works with our code
 base

mypy >=0.960 rejects macro_collector.py.
https://github.com/Mbed-TLS/mbedtls-framework/issues/50

We currently need mypy >=0.940, <0.960. Pick 0.942, which works, and is the
system version on Ubuntu 22.04.

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 scripts/ci.requirements.txt | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/scripts/ci.requirements.txt b/scripts/ci.requirements.txt
index 1ef8abd58a..fc10c63b85 100644
--- a/scripts/ci.requirements.txt
+++ b/scripts/ci.requirements.txt
@@ -7,9 +7,13 @@
 # 2.4.4 is the version in Ubuntu 20.04. It supports Python >=3.5.
 pylint == 2.4.4
 
-# Use the last version of mypy that is compatible with Python 3.6.
-# Newer versions should be ok too.
-mypy >= 0.971
+# Use a version of mypy that is compatible with our code base.
+# mypy <0.940 is known not to work: see commit
+#  :/Upgrade mypy to the last version supporting Python 3.6
+# mypy >=0.960 is known not to work:
+#   https://github.com/Mbed-TLS/mbedtls-framework/issues/50
+# mypy 0.942 is the version in Ubuntu 22.04.
+mypy == 0.942
 
 # At the time of writing, only needed for tests/scripts/audit-validity-dates.py.
 # It needs >=35.0.0 for correct operation, and that requires Python >=3.6,

From 5d633ff7454753dac5b58e3abb7f42d54b3aee26 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Fri, 4 Oct 2024 15:52:01 +0200
Subject: [PATCH 15/16] Default to allowing partial test coverage

Currently, many test cases are not executed. A follow-up pull request will
take care of that. In the meantime, continue allowing partial test coverage.

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 tests/scripts/analyze_outcomes.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/scripts/analyze_outcomes.py b/tests/scripts/analyze_outcomes.py
index c40dab55e6..72dba99f7c 100755
--- a/tests/scripts/analyze_outcomes.py
+++ b/tests/scripts/analyze_outcomes.py
@@ -13,7 +13,9 @@ from mbedtls_framework import outcome_analysis
 
 
 class CoverageTask(outcome_analysis.CoverageTask):
-    pass # We'll populate IGNORED_TESTS soon
+    # We'll populate IGNORED_TESTS soon. In the meantime, lack of coverage
+    # is just a warning.
+    outcome_analysis.FULL_COVERAGE_BY_DEFAULT = False
 
 
 # The names that we give to classes derived from DriverVSReference do not

From 0cf1bf48512e54b9807b9338564a89c4fd4b1653 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Thu, 3 Oct 2024 19:13:57 +0200
Subject: [PATCH 16/16] Update framework to the branch with
 collect_test_cases.py and outcome_analysis.py

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 framework | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/framework b/framework
index 33ac133217..1de0641e78 160000
--- a/framework
+++ b/framework
@@ -1 +1 @@
-Subproject commit 33ac13321737c333f52659ee848ca25746588227
+Subproject commit 1de0641e789d3c38b3ce99d7922002992cbe816c