mirror of
https://github.com/Mbed-TLS/mbedtls.git
synced 2025-07-29 11:41:15 +03:00
Address potential perf regression
Ensure platforms that don't have an assembly implementation for mbedtls_get_unaligned_volatile_uint32() don't experience a performance regression. Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
This commit is contained in:
@ -50,18 +50,29 @@
|
|||||||
/*
|
/*
|
||||||
* Define MBEDTLS_EFFICIENT_UNALIGNED_ACCESS for architectures where unaligned memory
|
* Define MBEDTLS_EFFICIENT_UNALIGNED_ACCESS for architectures where unaligned memory
|
||||||
* accesses are known to be safe and efficient.
|
* accesses are known to be safe and efficient.
|
||||||
|
*/
|
||||||
|
#if defined(__ARM_FEATURE_UNALIGNED)
|
||||||
|
/* __ARM_FEATURE_UNALIGNED is defined by armcc, gcc 7, clang 9 and later versions */
|
||||||
|
#define MBEDTLS_EFFICIENT_UNALIGNED_ACCESS
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Define MBEDTLS_EFFICIENT_UNALIGNED_VOLATILE_ACCESS where assembly is present to
|
||||||
|
* perform fast unaligned access to volatile data.
|
||||||
*
|
*
|
||||||
* This is needed because mbedtls_get_unaligned_uintXX etc don't support volatile
|
* This is needed because mbedtls_get_unaligned_uintXX etc don't support volatile
|
||||||
* memory accesses.
|
* memory accesses.
|
||||||
*
|
*
|
||||||
* This macro could be moved into alignment.h but for now it's only used here.
|
* Some of these definitions could be moved into alignment.h but for now they are
|
||||||
|
* only used here.
|
||||||
*/
|
*/
|
||||||
#if defined(__ARM_FEATURE_UNALIGNED)
|
#if defined(MBEDTLS_EFFICIENT_UNALIGNED_ACCESS) && defined(MBEDTLS_HAVE_ASM)
|
||||||
/* __ARM_FEATURE_UNALIGNED is defined by armcc, gcc 7, clang 9 and later versions. */
|
#if defined(__arm__) || defined(__thumb__) || defined(__thumb2__) || defined(__aarch64__)
|
||||||
#define MBEDTLS_EFFICIENT_UNALIGNED_ACCESS
|
#define MBEDTLS_EFFICIENT_UNALIGNED_VOLATILE_ACCESS
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(MBEDTLS_EFFICIENT_UNALIGNED_ACCESS) && defined(MBEDTLS_HAVE_ASM)
|
#if defined(MBEDTLS_EFFICIENT_UNALIGNED_VOLATILE_ACCESS)
|
||||||
static inline uint32_t mbedtls_get_unaligned_volatile_uint32(volatile const unsigned char *p)
|
static inline uint32_t mbedtls_get_unaligned_volatile_uint32(volatile const unsigned char *p)
|
||||||
{
|
{
|
||||||
/* This is UB, even where it's safe:
|
/* This is UB, even where it's safe:
|
||||||
@ -71,21 +82,12 @@ static inline uint32_t mbedtls_get_unaligned_volatile_uint32(volatile const unsi
|
|||||||
uint32_t r;
|
uint32_t r;
|
||||||
#if defined(__arm__) || defined(__thumb__) || defined(__thumb2__)
|
#if defined(__arm__) || defined(__thumb__) || defined(__thumb2__)
|
||||||
asm ("ldr %0, [%1]" : "=r" (r) : "r" (p) :);
|
asm ("ldr %0, [%1]" : "=r" (r) : "r" (p) :);
|
||||||
return r;
|
#elif defined(__aarch64__)
|
||||||
#endif
|
|
||||||
#if defined(__aarch64__)
|
|
||||||
asm ("ldr %w0, [%1]" : "=r" (r) : "r" (p) :);
|
asm ("ldr %w0, [%1]" : "=r" (r) : "r" (p) :);
|
||||||
return r;
|
|
||||||
#endif
|
#endif
|
||||||
|
return r;
|
||||||
/* Always safe, but inefficient, fall-back */
|
|
||||||
if (MBEDTLS_IS_BIG_ENDIAN) {
|
|
||||||
return (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3];
|
|
||||||
} else {
|
|
||||||
return p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
#endif /* MBEDTLS_EFFICIENT_UNALIGNED_ACCESS */
|
#endif /* MBEDTLS_EFFICIENT_UNALIGNED_VOLATILE_ACCESS */
|
||||||
|
|
||||||
int mbedtls_ct_memcmp(const void *a,
|
int mbedtls_ct_memcmp(const void *a,
|
||||||
const void *b,
|
const void *b,
|
||||||
@ -96,7 +98,7 @@ int mbedtls_ct_memcmp(const void *a,
|
|||||||
volatile const unsigned char *B = (volatile const unsigned char *) b;
|
volatile const unsigned char *B = (volatile const unsigned char *) b;
|
||||||
volatile uint32_t diff = 0;
|
volatile uint32_t diff = 0;
|
||||||
|
|
||||||
#if defined(MBEDTLS_EFFICIENT_UNALIGNED_ACCESS)
|
#if defined(MBEDTLS_EFFICIENT_UNALIGNED_VOLATILE_ACCESS)
|
||||||
for (; (i + 4) <= n; i += 4) {
|
for (; (i + 4) <= n; i += 4) {
|
||||||
uint32_t x = mbedtls_get_unaligned_volatile_uint32(A + i);
|
uint32_t x = mbedtls_get_unaligned_volatile_uint32(A + i);
|
||||||
uint32_t y = mbedtls_get_unaligned_volatile_uint32(B + i);
|
uint32_t y = mbedtls_get_unaligned_volatile_uint32(B + i);
|
||||||
|
Reference in New Issue
Block a user