From 058faeadac1d5134ff5899672ecd43b023960945 Mon Sep 17 00:00:00 2001 From: Brad Warren Date: Mon, 23 Aug 2021 10:57:34 -0700 Subject: [PATCH] Propagate requirement that ACME responses are UTF-8 (#9001) I think this fixes https://github.com/certbot/certbot/issues/8968. The only other calls with `requests` we make in our code outside of our tests that I could find are: 1. [Here](https://github.com/certbot/certbot/blob/a8a8a39ff18ee33936d385b5ac403daa81b7ed31/certbot/certbot/_internal/eff.py#L91) where we assume the response is JSON and I think [requests behavior](https://github.com/psf/requests/blob/db575eeedcfdb03bf31285afd3033e301df8b685/requests/models.py#L891-L896) is sane. 2. [Here](https://github.com/certbot/certbot/blob/a8a8a39ff18ee33936d385b5ac403daa81b7ed31/certbot/certbot/ocsp.py#L190) where we know the response contains binary data. I think this is a pretty minor change because we were already assuming the response was UTF-8 in the code here when logging it which I think is a valid assumption because the spec says that [all content should be UTF-8 encoded](https://datatracker.ietf.org/doc/html/rfc8555#section-5). I added the check for the `Accept` header due to the text [here](https://datatracker.ietf.org/doc/html/rfc8555#section-7.4.2) saying that it can be used to request the certificate in an alternate format such as DER. We currently set the Accept header in our own ACMEv1 client code before downloading the DER certificate, but this isn't required according to [the closest thing I think we have to an ACMEv1 spec](https://github.com/letsencrypt/boulder/blob/f1894f8d1d91c250d8193be02cb08b22d849f641/docs/acme-divergences-v1.md#section-742) so I left the content type check with a comment that it can be removed in the future. * Revert "add chardet dep (#8965)" This reverts commit 1129d850d3257deae1c5de5b35c35c873d1fca7c. * set response.encoding in acme * more docs --- acme/acme/client.py | 18 ++++++++++++++---- acme/setup.py | 4 ---- certbot/CHANGELOG.md | 3 +++ 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/acme/acme/client.py b/acme/acme/client.py index 28ed4f5bb..93816abfb 100644 --- a/acme/acme/client.py +++ b/acme/acme/client.py @@ -1149,13 +1149,23 @@ class ClientNetwork: host, path, _err_no, err_msg = m.groups() raise ValueError("Requesting {0}{1}:{2}".format(host, path, err_msg)) - # If content is DER, log the base64 of it instead of raw bytes, to keep - # binary data out of the logs. + # If the Content-Type is DER or an Accept header was sent in the + # request, the response may not be UTF-8 encoded. In this case, we + # don't set response.encoding and log the base64 response instead of + # raw bytes to keep binary data out of the logs. This code can be + # simplified to only check for an Accept header in the request when + # ACMEv1 support is dropped. debug_content: Union[bytes, str] - if response.headers.get("Content-Type") == DER_CONTENT_TYPE: + if (response.headers.get("Content-Type") == DER_CONTENT_TYPE or + "Accept" in kwargs["headers"]): debug_content = base64.b64encode(response.content) else: - debug_content = response.content.decode("utf-8") + # We set response.encoding so response.text knows the response is + # UTF-8 encoded instead of trying to guess the encoding that was + # used which is error prone. This setting affects all future + # accesses of .text made on the returned response object as well. + response.encoding = "utf-8" + debug_content = response.text logger.debug('Received response:\nHTTP %d\n%s\n\n%s', response.status_code, "\n".join("{0}: {1}".format(k, v) diff --git a/acme/setup.py b/acme/setup.py index a38a64efb..baed93c65 100644 --- a/acme/setup.py +++ b/acme/setup.py @@ -6,10 +6,6 @@ from setuptools import setup version = '1.19.0.dev0' install_requires = [ - # This dependency just exists to ensure that chardet is installed along - # with requests so it will use it instead of charset_normalizer. See - # https://github.com/certbot/certbot/issues/8964 for more info. - 'chardet', 'cryptography>=2.1.4', # formerly known as acme.jose: # 1.1.0+ is required to avoid the warnings described at diff --git a/certbot/CHANGELOG.md b/certbot/CHANGELOG.md index 88e5e84b4..5e86b470e 100644 --- a/certbot/CHANGELOG.md +++ b/certbot/CHANGELOG.md @@ -18,6 +18,9 @@ Certbot adheres to [Semantic Versioning](https://semver.org/). * `zope` based interfaces in `certbot.interfaces` module are deprecated and will be removed in a future release of Certbot. Any import of these interfaces will emit a warning to prepare the transition for developers. +* We removed the dependency on `chardet` from our acme library. Except for when + downloading a certificate in an alternate format, our acme library now + assumes all server responses are UTF-8 encoded which is required by RFC 8555. ### Fixed