mirror of
https://github.com/esp8266/Arduino.git
synced 2025-07-16 00:43:00 +03:00
Update mmu_get... and mmu_set... (#8290)
These changes are needed to address bugs that can emerge with the improved optimization from the GCC 10.3 compiler. Updated performance inline functions `mmu_get_uint8()`, ... and `mmu_set_uint8()`, ... to comply with strict-aliasing rules. Without this change, stale data may be referenced. This issue was revealed in discussions on https://github.com/esp8266/Arduino/issues/8261#issue-963529268 Changes to avoid over-optimization of 32-bit wide transfers from IRAM, turning into 8-bit or 16-bit transfers by the new GCC 10.3 compiler. This has been a reoccurring/tricky problem for me with the new compiler. So far referencing the 32-bit value loaded by way of an Extended ASM R/W output register has stopped the compiler from optimizing down to an 8-bit or 16-bit transfer. Example: ```cpp uint32_t val; __builtin_memcpy(&val, v32, sizeof(uint32_t)); asm volatile ("" :"+r"(val)); // inject 32-bit dependency ... ``` Updated example `irammem.ino` * do a simple test of compliance to strict-aliasing rules * For `mmu_get_uint8()`, added tests to evaluate if 32-bit wide transfers were converted to an 8-bit transfer.
This commit is contained in:
@ -26,13 +26,24 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
//C This turns on range checking. Is this the value you want to trigger it?
|
||||
// This turns on range checking.
|
||||
#ifdef DEBUG_ESP_CORE
|
||||
#define DEBUG_ESP_MMU
|
||||
#endif
|
||||
|
||||
#if defined(CORE_MOCK)
|
||||
#define ets_uart_printf(...) do {} while(false)
|
||||
#define XCHAL_INSTRAM0_VADDR 0x40000000
|
||||
#define XCHAL_INSTRAM1_VADDR 0x40100000
|
||||
#define XCHAL_INSTROM0_VADDR 0x40200000
|
||||
#else
|
||||
#include <sys/config.h> // For config/core-isa.h
|
||||
/*
|
||||
Cautiously use XCHAL_..._VADDR values where possible.
|
||||
While XCHAL_..._VADDR values in core-isa.h may define the Xtensa processor
|
||||
CONFIG options, they are not always an indication of DRAM, IRAM, or ROM
|
||||
size or position in the address space.
|
||||
*/
|
||||
#endif
|
||||
|
||||
/*
|
||||
@ -71,32 +82,34 @@ DBG_MMU_FLUSH(0)
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
bool mmu_is_iram(const void *addr) {
|
||||
#define IRAM_START 0x40100000UL
|
||||
const uintptr_t iram_start = (uintptr_t)XCHAL_INSTRAM1_VADDR;
|
||||
#ifndef MMU_IRAM_SIZE
|
||||
#if defined(__GNUC__) && !defined(CORE_MOCK)
|
||||
#warning "MMU_IRAM_SIZE was undefined, setting to 0x8000UL!"
|
||||
#endif
|
||||
#define MMU_IRAM_SIZE 0x8000UL
|
||||
#define MMU_IRAM_SIZE 0x8000ul
|
||||
#endif
|
||||
#define IRAM_END (IRAM_START + MMU_IRAM_SIZE)
|
||||
const uintptr_t iram_end = iram_start + MMU_IRAM_SIZE;
|
||||
|
||||
return (IRAM_START <= (uintptr_t)addr && IRAM_END > (uintptr_t)addr);
|
||||
return (iram_start <= (uintptr_t)addr && iram_end > (uintptr_t)addr);
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
bool mmu_is_dram(const void *addr) {
|
||||
#define DRAM_START 0x3FF80000UL
|
||||
#define DRAM_END 0x40000000UL
|
||||
const uintptr_t dram_start = 0x3FFE8000ul;
|
||||
// The start of the Boot ROM sits at the end of DRAM. 0x40000000ul;
|
||||
const uintptr_t dram_end = (uintptr_t)XCHAL_INSTRAM0_VADDR;
|
||||
|
||||
return (DRAM_START <= (uintptr_t)addr && DRAM_END > (uintptr_t)addr);
|
||||
return (dram_start <= (uintptr_t)addr && dram_end > (uintptr_t)addr);
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
bool mmu_is_icache(const void *addr) {
|
||||
#define ICACHE_START 0x40200000UL
|
||||
#define ICACHE_END (ICACHE_START + 0x100000UL)
|
||||
extern void _irom0_text_end(void);
|
||||
const uintptr_t icache_start = (uintptr_t)XCHAL_INSTROM0_VADDR;
|
||||
const uintptr_t icache_end = (uintptr_t)_irom0_text_end;
|
||||
|
||||
return (ICACHE_START <= (uintptr_t)addr && ICACHE_END > (uintptr_t)addr);
|
||||
return (icache_start <= (uintptr_t)addr && icache_end > (uintptr_t)addr);
|
||||
}
|
||||
|
||||
#ifdef DEBUG_ESP_MMU
|
||||
@ -127,8 +140,26 @@ bool mmu_is_icache(const void *addr) {
|
||||
static inline __attribute__((always_inline))
|
||||
uint8_t mmu_get_uint8(const void *p8) {
|
||||
ASSERT_RANGE_TEST_READ(p8);
|
||||
uint32_t val = (*(uint32_t *)((uintptr_t)p8 & ~0x3));
|
||||
uint32_t pos = ((uintptr_t)p8 & 0x3) * 8;
|
||||
// https://gist.github.com/shafik/848ae25ee209f698763cffee272a58f8#how-do-we-type-pun-correctly
|
||||
// Comply with strict-aliasing rules. Using memcpy is a Standards suggested
|
||||
// method for type punning. The compiler optimizer will replace the memcpy
|
||||
// with an `l32i` instruction. Using __builtin_memcpy to ensure we get the
|
||||
// effects of the compiler optimization and not some #define version of
|
||||
// memcpy.
|
||||
void *v32 = (void *)((uintptr_t)p8 & ~(uintptr_t)3u);
|
||||
uint32_t val;
|
||||
__builtin_memcpy(&val, v32, sizeof(uint32_t));
|
||||
// Use an empty ASM to reference the 32-bit value. This will block the
|
||||
// compiler from immediately optimizing to an 8-bit or 16-bit load instruction
|
||||
// against IRAM memory. (This approach was inspired by
|
||||
// https://github.com/esp8266/Arduino/pull/7780#discussion_r548303374)
|
||||
// This issue was seen when using a constant address with the GCC 10.3
|
||||
// compiler.
|
||||
// As a general practice, I think referencing by way of Extended ASM R/W
|
||||
// output register will stop the the compiler from reloading the value later
|
||||
// as 8-bit load from IRAM.
|
||||
asm volatile ("" :"+r"(val)); // inject 32-bit dependency
|
||||
uint32_t pos = ((uintptr_t)p8 & 3u) * 8u;
|
||||
val >>= pos;
|
||||
return (uint8_t)val;
|
||||
}
|
||||
@ -136,8 +167,11 @@ uint8_t mmu_get_uint8(const void *p8) {
|
||||
static inline __attribute__((always_inline))
|
||||
uint16_t mmu_get_uint16(const uint16_t *p16) {
|
||||
ASSERT_RANGE_TEST_READ(p16);
|
||||
uint32_t val = (*(uint32_t *)((uintptr_t)p16 & ~0x3));
|
||||
uint32_t pos = ((uintptr_t)p16 & 0x3) * 8;
|
||||
void *v32 = (void *)((uintptr_t)p16 & ~(uintptr_t)0x3u);
|
||||
uint32_t val;
|
||||
__builtin_memcpy(&val, v32, sizeof(uint32_t));
|
||||
asm volatile ("" :"+r"(val));
|
||||
uint32_t pos = ((uintptr_t)p16 & 3u) * 8u;
|
||||
val >>= pos;
|
||||
return (uint16_t)val;
|
||||
}
|
||||
@ -145,8 +179,11 @@ uint16_t mmu_get_uint16(const uint16_t *p16) {
|
||||
static inline __attribute__((always_inline))
|
||||
int16_t mmu_get_int16(const int16_t *p16) {
|
||||
ASSERT_RANGE_TEST_READ(p16);
|
||||
uint32_t val = (*(uint32_t *)((uintptr_t)p16 & ~0x3));
|
||||
uint32_t pos = ((uintptr_t)p16 & 0x3) * 8;
|
||||
void *v32 = (void *)((uintptr_t)p16 & ~(uintptr_t)3u);
|
||||
uint32_t val;
|
||||
__builtin_memcpy(&val, v32, sizeof(uint32_t));
|
||||
asm volatile ("" :"+r"(val));
|
||||
uint32_t pos = ((uintptr_t)p16 & 3u) * 8u;
|
||||
val >>= pos;
|
||||
return (int16_t)val;
|
||||
}
|
||||
@ -154,30 +191,43 @@ int16_t mmu_get_int16(const int16_t *p16) {
|
||||
static inline __attribute__((always_inline))
|
||||
uint8_t mmu_set_uint8(void *p8, const uint8_t val) {
|
||||
ASSERT_RANGE_TEST_WRITE(p8);
|
||||
uint32_t pos = ((uintptr_t)p8 & 0x3) * 8;
|
||||
uint32_t pos = ((uintptr_t)p8 & 3u) * 8u;
|
||||
uint32_t sval = val << pos;
|
||||
uint32_t valmask = 0x0FF << pos;
|
||||
uint32_t valmask = 0x0FFu << pos;
|
||||
|
||||
void *v32 = (void *)((uintptr_t)p8 & ~(uintptr_t)3u);
|
||||
uint32_t ival;
|
||||
__builtin_memcpy(&ival, v32, sizeof(uint32_t));
|
||||
asm volatile ("" :"+r"(ival));
|
||||
|
||||
uint32_t *p32 = (uint32_t *)((uintptr_t)p8 & ~0x3);
|
||||
uint32_t ival = *p32;
|
||||
ival &= (~valmask);
|
||||
ival |= sval;
|
||||
*p32 = ival;
|
||||
/*
|
||||
This 32-bit dependency injection does not appear to be needed with the
|
||||
current GCC 10.3; however, that could change in the future versions. Or, I
|
||||
may not have the right test for it to fail.
|
||||
*/
|
||||
asm volatile ("" :"+r"(ival));
|
||||
__builtin_memcpy(v32, &ival, sizeof(uint32_t));
|
||||
return val;
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
uint16_t mmu_set_uint16(uint16_t *p16, const uint16_t val) {
|
||||
ASSERT_RANGE_TEST_WRITE(p16);
|
||||
uint32_t pos = ((uintptr_t)p16 & 0x3) * 8;
|
||||
uint32_t pos = ((uintptr_t)p16 & 3u) * 8u;
|
||||
uint32_t sval = val << pos;
|
||||
uint32_t valmask = 0x0FFFF << pos;
|
||||
uint32_t valmask = 0x0FFFFu << pos;
|
||||
|
||||
void *v32 = (void *)((uintptr_t)p16 & ~(uintptr_t)3u);
|
||||
uint32_t ival;
|
||||
__builtin_memcpy(&ival, v32, sizeof(uint32_t));
|
||||
asm volatile ("" :"+r"(ival));
|
||||
|
||||
uint32_t *p32 = (uint32_t *)((uintptr_t)p16 & ~0x3);
|
||||
uint32_t ival = *p32;
|
||||
ival &= (~valmask);
|
||||
ival |= sval;
|
||||
*p32 = ival;
|
||||
asm volatile ("" :"+r"(ival));
|
||||
__builtin_memcpy(v32, &ival, sizeof(uint32_t));
|
||||
return val;
|
||||
}
|
||||
|
||||
@ -185,32 +235,36 @@ static inline __attribute__((always_inline))
|
||||
int16_t mmu_set_int16(int16_t *p16, const int16_t val) {
|
||||
ASSERT_RANGE_TEST_WRITE(p16);
|
||||
uint32_t sval = (uint16_t)val;
|
||||
uint32_t pos = ((uintptr_t)p16 & 0x3) * 8;
|
||||
uint32_t pos = ((uintptr_t)p16 & 3u) * 8u;
|
||||
sval <<= pos;
|
||||
uint32_t valmask = 0x0FFFF << pos;
|
||||
uint32_t valmask = 0x0FFFFu << pos;
|
||||
|
||||
void *v32 = (void *)((uintptr_t)p16 & ~(uintptr_t)3u);
|
||||
uint32_t ival;
|
||||
__builtin_memcpy(&ival, v32, sizeof(uint32_t));
|
||||
asm volatile ("" :"+r"(ival));
|
||||
|
||||
uint32_t *p32 = (uint32_t *)((uintptr_t)p16 & ~0x3);
|
||||
uint32_t ival = *p32;
|
||||
ival &= (~valmask);
|
||||
ival |= sval;
|
||||
*p32 = ival;
|
||||
asm volatile ("" :"+r"(ival));
|
||||
__builtin_memcpy(v32, &ival, sizeof(uint32_t));
|
||||
return val;
|
||||
}
|
||||
|
||||
#if (MMU_IRAM_SIZE > 32*1024) && !defined(MMU_SEC_HEAP)
|
||||
extern void _text_end(void);
|
||||
#define MMU_SEC_HEAP mmu_sec_heap()
|
||||
#define MMU_SEC_HEAP_SIZE mmu_sec_heap_size()
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
void *mmu_sec_heap(void) {
|
||||
uint32_t sec_heap = (uint32_t)_text_end + 32;
|
||||
return (void *)(sec_heap &= ~7);
|
||||
extern void _text_end(void);
|
||||
uintptr_t sec_heap = (uintptr_t)_text_end + (uintptr_t)32u;
|
||||
return (void *)(sec_heap &= ~(uintptr_t)7u);
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
size_t mmu_sec_heap_size(void) {
|
||||
return (size_t)0xC000UL - ((size_t)mmu_sec_heap() - 0x40100000UL);
|
||||
return (size_t)0xC000ul - ((uintptr_t)mmu_sec_heap() - (uintptr_t)XCHAL_INSTRAM1_VADDR);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
Reference in New Issue
Block a user