1
0
mirror of https://github.com/Mbed-TLS/mbedtls.git synced 2025-07-29 11:41:15 +03:00

Merge pull request #8326 from daverodgman/aesce-thumb2

Support hw-accelerated AES on Thumb and Arm
This commit is contained in:
Dave Rodgman
2023-11-27 09:58:58 +00:00
committed by GitHub
6 changed files with 219 additions and 32 deletions

View File

@ -23,9 +23,9 @@
#include "mbedtls/error.h"
#if defined(MBEDTLS_AES_USE_HARDWARE_ONLY)
#if !((defined(MBEDTLS_ARCH_IS_ARM64) && defined(MBEDTLS_AESCE_C)) || \
(defined(MBEDTLS_ARCH_IS_X64) && defined(MBEDTLS_AESNI_C)) || \
(defined(MBEDTLS_ARCH_IS_X86) && defined(MBEDTLS_AESNI_C)))
#if !((defined(MBEDTLS_ARCH_IS_ARMV8_A) && defined(MBEDTLS_AESCE_C)) || \
(defined(MBEDTLS_ARCH_IS_X64) && defined(MBEDTLS_AESNI_C)) || \
(defined(MBEDTLS_ARCH_IS_X86) && defined(MBEDTLS_AESNI_C)))
#error "MBEDTLS_AES_USE_HARDWARE_ONLY defined, but not all prerequisites"
#endif
#endif

View File

@ -5,8 +5,17 @@
* SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
*/
#if defined(__aarch64__) && !defined(__ARM_FEATURE_CRYPTO) && \
defined(__clang__) && __clang_major__ >= 4
#if defined(__clang__) && (__clang_major__ >= 4)
/* Ideally, we would simply use MBEDTLS_ARCH_IS_ARMV8_A in the following #if,
* but that is defined by build_info.h, and we need this block to happen first. */
#if defined(__ARM_ARCH)
#if __ARM_ARCH >= 8
#define MBEDTLS_AESCE_ARCH_IS_ARMV8_A
#endif
#endif
#if defined(MBEDTLS_AESCE_ARCH_IS_ARMV8_A) && !defined(__ARM_FEATURE_CRYPTO)
/* TODO: Re-consider above after https://reviews.llvm.org/D131064 merged.
*
* The intrinsic declaration are guarded by predefined ACLE macros in clang:
@ -27,6 +36,8 @@
#define MBEDTLS_ENABLE_ARM_CRYPTO_EXTENSIONS_COMPILER_FLAG
#endif
#endif /* defined(__clang__) && (__clang_major__ >= 4) */
#include <string.h>
#include "common.h"
@ -34,12 +45,14 @@
#include "aesce.h"
#if defined(MBEDTLS_ARCH_IS_ARM64)
#if defined(MBEDTLS_ARCH_IS_ARMV8_A) && defined(__ARM_NEON)
/* Compiler version checks. */
#if defined(__clang__)
# if __clang_major__ < 4
# error "Minimum version of Clang for MBEDTLS_AESCE_C is 4.0."
# if defined(MBEDTLS_ARCH_IS_ARM32) && (__clang_major__ < 11)
# error "Minimum version of Clang for MBEDTLS_AESCE_C on 32-bit Arm or Thumb is 11.0."
# elif defined(MBEDTLS_ARCH_IS_ARM64) && (__clang_major__ < 4)
# error "Minimum version of Clang for MBEDTLS_AESCE_C on aarch64 is 4.0."
# endif
#elif defined(__GNUC__)
# if __GNUC__ < 6
@ -52,6 +65,15 @@
# if _MSC_VER < 1929
# error "Minimum version of MSVC for MBEDTLS_AESCE_C is 2019 version 16.11.2."
# endif
#elif defined(__ARMCC_VERSION)
# if defined(MBEDTLS_ARCH_IS_ARM32) && (__ARMCC_VERSION < 6200002)
/* TODO: We haven't verified armclang for 32-bit Arm/Thumb prior to 6.20.
* If someone verified that, please update this and document of
* `MBEDTLS_AESCE_C` in `mbedtls_config.h`. */
# error "Minimum version of armclang for MBEDTLS_AESCE_C on 32-bit Arm is 6.20."
# elif defined(MBEDTLS_ARCH_IS_ARM64) && (__ARMCC_VERSION < 6060000)
# error "Minimum version of armclang for MBEDTLS_AESCE_C on aarch64 is 6.6."
# endif
#endif
#ifdef __ARM_NEON
@ -84,8 +106,19 @@
#if defined(__linux__) && !defined(MBEDTLS_AES_USE_HARDWARE_ONLY)
#include <asm/hwcap.h>
#include <sys/auxv.h>
#if !defined(HWCAP_NEON)
#define HWCAP_NEON (1 << 12)
#endif
#if !defined(HWCAP2_AES)
#define HWCAP2_AES (1 << 0)
#endif
#if !defined(HWCAP_AES)
#define HWCAP_AES (1 << 3)
#endif
#if !defined(HWCAP_ASIMD)
#define HWCAP_ASIMD (1 << 1)
#endif
signed char mbedtls_aesce_has_support_result = -1;
@ -102,6 +135,16 @@ int mbedtls_aesce_has_support_impl(void)
* once, but that is harmless.
*/
if (mbedtls_aesce_has_support_result == -1) {
#if defined(MBEDTLS_ARCH_IS_ARM32)
unsigned long auxval = getauxval(AT_HWCAP);
unsigned long auxval2 = getauxval(AT_HWCAP2);
if (((auxval & HWCAP_NEON) == HWCAP_NEON) &&
((auxval2 & HWCAP2_AES) == HWCAP2_AES)) {
mbedtls_aesce_has_support_result = 1;
} else {
mbedtls_aesce_has_support_result = 0;
}
#else
unsigned long auxval = getauxval(AT_HWCAP);
if ((auxval & (HWCAP_ASIMD | HWCAP_AES)) ==
(HWCAP_ASIMD | HWCAP_AES)) {
@ -109,6 +152,7 @@ int mbedtls_aesce_has_support_impl(void)
} else {
mbedtls_aesce_has_support_result = 0;
}
#endif
}
return mbedtls_aesce_has_support_result;
}
@ -362,24 +406,91 @@ int mbedtls_aesce_setkey_enc(unsigned char *rk,
#if defined(MBEDTLS_GCM_C)
#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ == 5
/* Some intrinsics are not available for GCC 5.X. */
#define vreinterpretq_p64_u8(a) ((poly64x2_t) a)
#define vreinterpretq_u8_p128(a) ((uint8x16_t) a)
static inline poly64_t vget_low_p64(poly64x2_t __a)
#if defined(MBEDTLS_ARCH_IS_ARM32)
#if defined(__clang__)
/* On clang for A32/T32, work around some missing intrinsics and types which are listed in
* [ACLE](https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#polynomial-1)
* These are only required for GCM.
*/
#define vreinterpretq_u64_p64(a) ((uint64x2_t) a)
typedef uint8x16_t poly128_t;
static inline poly128_t vmull_p64(poly64_t a, poly64_t b)
{
uint64x2_t tmp = (uint64x2_t) (__a);
uint64x1_t lo = vcreate_u64(vgetq_lane_u64(tmp, 0));
return (poly64_t) (lo);
poly128_t r;
asm ("vmull.p64 %[r], %[a], %[b]" : [r] "=w" (r) : [a] "w" (a), [b] "w" (b) :);
return r;
}
#endif /* !__clang__ && __GNUC__ && __GNUC__ == 5*/
/* This is set to cause some more missing intrinsics to be defined below */
#define COMMON_MISSING_INTRINSICS
static inline poly128_t vmull_high_p64(poly64x2_t a, poly64x2_t b)
{
return vmull_p64((poly64_t) (vget_high_u64((uint64x2_t) a)),
(poly64_t) (vget_high_u64((uint64x2_t) b)));
}
#endif /* defined(__clang__) */
static inline uint8x16_t vrbitq_u8(uint8x16_t x)
{
/* There is no vrbitq_u8 instruction in A32/T32, so provide
* an equivalent non-Neon implementation. Reverse bit order in each
* byte with 4x rbit, rev. */
asm ("ldm %[p], { r2-r5 } \n\t"
"rbit r2, r2 \n\t"
"rev r2, r2 \n\t"
"rbit r3, r3 \n\t"
"rev r3, r3 \n\t"
"rbit r4, r4 \n\t"
"rev r4, r4 \n\t"
"rbit r5, r5 \n\t"
"rev r5, r5 \n\t"
"stm %[p], { r2-r5 } \n\t"
:
/* Output: 16 bytes of memory pointed to by &x */
"+m" (*(uint8_t(*)[16]) &x)
:
[p] "r" (&x)
:
"r2", "r3", "r4", "r5"
);
return x;
}
#endif /* defined(MBEDTLS_ARCH_IS_ARM32) */
#if defined(MBEDTLS_COMPILER_IS_GCC) && __GNUC__ == 5
/* Some intrinsics are not available for GCC 5.X. */
#define COMMON_MISSING_INTRINSICS
#endif /* MBEDTLS_COMPILER_IS_GCC && __GNUC__ == 5 */
#if defined(COMMON_MISSING_INTRINSICS)
/* Missing intrinsics common to both GCC 5, and Clang on 32-bit */
#define vreinterpretq_p64_u8(a) ((poly64x2_t) a)
#define vreinterpretq_u8_p128(a) ((uint8x16_t) a)
static inline poly64x1_t vget_low_p64(poly64x2_t a)
{
uint64x1_t r = vget_low_u64(vreinterpretq_u64_p64(a));
return (poly64x1_t) r;
}
#endif /* COMMON_MISSING_INTRINSICS */
/* vmull_p64/vmull_high_p64 wrappers.
*
* Older compilers miss some intrinsic functions for `poly*_t`. We use
* uint8x16_t and uint8x16x3_t as input/output parameters.
*/
#if defined(__GNUC__) && !defined(__clang__)
#if defined(MBEDTLS_COMPILER_IS_GCC)
/* GCC reports incompatible type error without cast. GCC think poly64_t and
* poly64x1_t are different, that is different with MSVC and Clang. */
#define MBEDTLS_VMULL_P64(a, b) vmull_p64((poly64_t) a, (poly64_t) b)
@ -388,7 +499,8 @@ static inline poly64_t vget_low_p64(poly64x2_t __a)
* error with/without cast. And I think poly64_t and poly64x1_t are same, no
* cast for clang also. */
#define MBEDTLS_VMULL_P64(a, b) vmull_p64(a, b)
#endif
#endif /* MBEDTLS_COMPILER_IS_GCC */
static inline uint8x16_t pmull_low(uint8x16_t a, uint8x16_t b)
{
@ -464,7 +576,7 @@ static inline uint8x16_t poly_mult_reduce(uint8x16x3_t input)
/* use 'asm' as an optimisation barrier to prevent loading MODULO from
* memory. It is for GNUC compatible compilers.
*/
asm ("" : "+w" (r));
asm volatile ("" : "+w" (r));
#endif
uint8x16_t const MODULO = vreinterpretq_u8_u64(vshrq_n_u64(r, 64 - 8));
uint8x16_t h, m, l; /* input high/middle/low 128b */
@ -507,6 +619,6 @@ void mbedtls_aesce_gcm_mult(unsigned char c[16],
#undef MBEDTLS_POP_TARGET_PRAGMA
#endif
#endif /* MBEDTLS_ARCH_IS_ARM64 */
#endif /* MBEDTLS_ARCH_IS_ARMV8_A */
#endif /* MBEDTLS_AESCE_C */

View File

@ -2,7 +2,7 @@
* \file aesce.h
*
* \brief Support hardware AES acceleration on Armv8-A processors with
* the Armv8-A Cryptographic Extension in AArch64 execution state.
* the Armv8-A Cryptographic Extension.
*
* \warning These functions are only for internal use by other library
* functions; you must not call them directly.
@ -19,7 +19,7 @@
#include "mbedtls/aes.h"
#if defined(MBEDTLS_AESCE_C) && defined(MBEDTLS_ARCH_IS_ARM64)
#if defined(MBEDTLS_AESCE_C) && defined(MBEDTLS_ARCH_IS_ARMV8_A) && defined(__ARM_NEON)
#define MBEDTLS_AESCE_HAVE_CODE
@ -118,6 +118,12 @@ int mbedtls_aesce_setkey_enc(unsigned char *rk,
}
#endif
#endif /* MBEDTLS_AESCE_C && MBEDTLS_ARCH_IS_ARM64 */
#else
#if defined(MBEDTLS_AES_USE_HARDWARE_ONLY) && defined(MBEDTLS_ARCH_IS_ARMV8_A)
#error "AES hardware acceleration not supported on this platform"
#endif
#endif /* MBEDTLS_AESCE_C && MBEDTLS_ARCH_IS_ARMV8_A && __ARM_NEON */
#endif /* MBEDTLS_AESCE_H */