Fix fullchain parsing for CRLF chains (#7877)

Fixes #7875 . After [this comment](https://github.com/certbot/certbot/issues/7875#issuecomment-608145208) and evaluating the options, I opted to go with `stricttextualmsg`, as required by RFC 8555. Reasoning is that the ACME v1 code path (via OpenSSL) produces a `fullchain_pem` which satisfies `stricttextualmsg`, so we don't need to be more generous than that. One downside of the `re` approach is that it doesn't seem capable of capturing repeating group matches. As a result, it matches each certificate individually, silently passing over any data in between the encapsulation boundaries, such as explanatory text, which is prohibited by RFC 8555. It would be ideal to raise an error when encountering such a non-conformant chain, but we'd need to create a mini-parser to do it, I think. * Fix fullchain parsing for CRLF chains. fullchain parsing now works in two passes: 1. A first pass which is generous with what it accepts - basically preeb(CERTIFICATE)+anything+posteb(CERTIFICATE). This determines the boundaries for each certificate. 2. A second pass which normalizes (by parsing and re-encoding) each certificate found in the first pass. * typo in docstring * remove redundant group in regex * can't use assertRaisesRegex until py27 is gone
2026-01-26 07:41:33 +03:00 · 2020-04-08 09:19:13 +10:00
parent 5992d521e2
commit e9895d2ec6
3 changed files with 48 additions and 5 deletions
--- a/certbot/CHANGELOG.md
+++ b/certbot/CHANGELOG.md
@@ -28,6 +28,8 @@ Certbot adheres to [Semantic Versioning](https://semver.org/).
 * Fix hanging OCSP queries during revocation checking - added a 10 second timeout.
 * Standalone servers now have a default socket timeout of 30 seconds, fixing
  cases where an idle connection can cause the standalone plugin to hang.
+* Parsing of the RFC 8555 application/pem-certificate-chain now tolerates CRLF line
+  endings. This should fix interoperability with Buypass' services.

 More details about these changes can be found on our GitHub repo.

--- a/certbot/certbot/crypto_util.py
+++ b/certbot/certbot/crypto_util.py
@@ -8,6 +8,7 @@ import hashlib
 import logging
 import warnings

+import re
 # See https://github.com/pyca/cryptography/issues/4275
 from cryptography import x509  # type: ignore
 from cryptography.exceptions import InvalidSignature
@@ -478,6 +479,17 @@ def sha256sum(filename):
        sha256.update(file_d.read().encode('UTF-8'))
    return sha256.hexdigest()

+# Finds one CERTIFICATE stricttextualmsg according to rfc7468#section-3.
+# Does not validate the base64text - use crypto.load_certificate.
+CERT_PEM_REGEX = re.compile(
+    b"""-----BEGIN CERTIFICATE-----\r?
+.+?\r?
+-----END CERTIFICATE-----\r?
+""",
+    re.DOTALL # DOTALL (/s) because the base64text may include newlines
+)
+
+
 def cert_and_chain_from_fullchain(fullchain_pem):
    """Split fullchain_pem into cert_pem and chain_pem

@@ -486,11 +498,24 @@ def cert_and_chain_from_fullchain(fullchain_pem):
    :returns: tuple of string cert_pem and chain_pem
    :rtype: tuple

+    :raises errors.Error: If there are less than 2 certificates in the chain.
+
    """
-    cert = crypto.dump_certificate(crypto.FILETYPE_PEM,
-        crypto.load_certificate(crypto.FILETYPE_PEM, fullchain_pem)).decode()
-    chain = fullchain_pem[len(cert):].lstrip()
-    return (cert, chain)
+    # First pass: find the boundary of each certificate in the chain.
+    # TODO: This will silently skip over any "explanatory text" in between boundaries,
+    # which is prohibited by RFC8555.
+    certs = CERT_PEM_REGEX.findall(fullchain_pem.encode())
+    if len(certs) < 2:
+        raise errors.Error("failed to parse fullchain into cert and chain: " +
+                           "less than 2 certificates in chain")
+
+    # Second pass: for each certificate found, parse it using OpenSSL and re-encode it,
+    # with the effect of normalizing any encoding variations (e.g. CRLF, whitespace).
+    certs_normalized = [crypto.dump_certificate(crypto.FILETYPE_PEM,
+        crypto.load_certificate(crypto.FILETYPE_PEM, cert)).decode() for cert in certs]
+
+    # Since each normalized cert has a newline suffix, no extra newlines are required.
+    return (certs_normalized[0], "".join(certs_normalized[1:]))

 def get_serial_from_cert(cert_path):
    """Retrieve the serial number of a certificate from certificate path
--- a/certbot/tests/crypto_util_test.py
+++ b/certbot/tests/crypto_util_test.py
@@ -376,17 +376,33 @@ class Sha256sumTest(unittest.TestCase):
 class CertAndChainFromFullchainTest(unittest.TestCase):
    """Tests for certbot.crypto_util.cert_and_chain_from_fullchain"""

+    def _parse_and_reencode_pem(self, cert_pem):
+        from OpenSSL import crypto
+        return crypto.dump_certificate(crypto.FILETYPE_PEM,
+            crypto.load_certificate(crypto.FILETYPE_PEM, cert_pem)).decode()
+
    def test_cert_and_chain_from_fullchain(self):
        cert_pem = CERT.decode()
        chain_pem = cert_pem + SS_CERT.decode()
        fullchain_pem = cert_pem + chain_pem
        spacey_fullchain_pem = cert_pem + u'\n' + chain_pem
+        crlf_fullchain_pem = fullchain_pem.replace(u'\n', u'\r\n')
+
+        # In the ACME v1 code path, the fullchain is constructed by loading cert+chain DERs
+        # and using OpenSSL to dump them, so here we confirm that OpenSSL is producing certs
+        # that will be parseable by cert_and_chain_from_fullchain.
+        acmev1_fullchain_pem = self._parse_and_reencode_pem(cert_pem) + \
+            self._parse_and_reencode_pem(cert_pem) + self._parse_and_reencode_pem(SS_CERT.decode())
+
        from certbot.crypto_util import cert_and_chain_from_fullchain
-        for fullchain in (fullchain_pem, spacey_fullchain_pem):
+        for fullchain in (fullchain_pem, spacey_fullchain_pem, crlf_fullchain_pem,
+                          acmev1_fullchain_pem):
            cert_out, chain_out = cert_and_chain_from_fullchain(fullchain)
            self.assertEqual(cert_out, cert_pem)
            self.assertEqual(chain_out, chain_pem)

+        self.assertRaises(errors.Error, cert_and_chain_from_fullchain, cert_pem)
+

 if __name__ == '__main__':
    unittest.main()  # pragma: no cover