mirror of
				https://github.com/esp8266/Arduino.git
				synced 2025-10-24 07:13:45 +03:00 
			
		
		
		
	Extended ASM got fussy when using different optimizations. ie. HWDT (#7816)
resets. It seems you should not use input registers for scratch registers. Add an extra output register instead. No code size increase. Light refactoring for readability Added "C" reference code for Extended ASM Save two cycles by loading a0 early in exc-c-wrapper-handler.S Use optimization O2 Net change in size, 0 bytes with optimization. Save 4 bytes w/o Optimization. With changes and "O2" save 3 cycles on write and 6 cycles on read.
This commit is contained in:
		| @@ -18,10 +18,10 @@ | ||||
|  */ | ||||
|  | ||||
| /* | ||||
|  * This exception handler, allows for byte or short accesses to iRAM or PROGMEM | ||||
|  * to succeed without causing a crash. It is still preferred to use the xxx_P | ||||
|  * macros whenever possible, since they are probably 30x faster than this | ||||
|  * exception handler method. | ||||
|  * This exception handler handles EXCCAUSE_LOAD_STORE_ERROR. It allows for a | ||||
|  * byte or short access to iRAM or PROGMEM to succeed without causing a crash. | ||||
|  * When reading, it is still preferred to use the xxx_P macros when possible | ||||
|  * since they are probably 30x faster than this exception handler method. | ||||
|  * | ||||
|  * Code taken directly from @pvvx's public domain code in | ||||
|  * https://github.com/pvvx/esp8266web/blob/master/app/sdklib/system/app_main.c | ||||
| @@ -37,6 +37,16 @@ | ||||
| #include <Schedule.h> | ||||
| #include <debug.h> | ||||
|  | ||||
| // All of these optimization were tried and now work | ||||
| // These results were from irammem.ino using GCC 10.2 | ||||
| // DRAM reference                    uint16    9 AVG cycles/transfer | ||||
| // #pragma GCC optimize("O0")     // uint16, 289 AVG cycles/transfer, IRAM: +180 | ||||
| // #pragma GCC optimize("O1")     // uint16, 241 AVG cycles/transfer, IRAM: +16 | ||||
| #pragma GCC optimize("O2")     // uint16, 230 AVG cycles/transfer, IRAM: +4 | ||||
| // #pragma GCC optimize("O3")     // uint16, 230 AVG cycles/transfer, IRAM: +4 | ||||
| // #pragma GCC optimize("Ofast")  // uint16, 230 AVG cycles/transfer, IRAM: +4 | ||||
| // #pragma GCC optimize("Os")     // uint16, 233 AVG cycles/transfer, IRAM: 27556  +0 | ||||
|  | ||||
| extern "C" { | ||||
|  | ||||
| #define LOAD_MASK   0x00f00fu | ||||
| @@ -50,32 +60,55 @@ extern "C" { | ||||
|  | ||||
| static fn_c_exception_handler_t old_c_handler = NULL; | ||||
|  | ||||
| static IRAM_ATTR void non32xfer_exception_handler(struct __exception_frame *ef, int cause) | ||||
| static | ||||
| IRAM_ATTR void non32xfer_exception_handler(struct __exception_frame *ef, int cause) | ||||
| { | ||||
|   do { | ||||
|     /* | ||||
|        Had to split out some of the asm, compiler was reusing a register that it | ||||
|        needed later. A crash would come or go away with the slightest unrelated | ||||
|        changes elsewhere in the function. | ||||
|        In adapting the public domain version, a crash would come or go away with | ||||
|        the slightest unrelated changes elsewhere in the function. Observed that | ||||
|        register a15 was used for epc1, then clobbered by `rsr.` I now believe a | ||||
|        "&" on the output register would have resolved the problem. | ||||
|  | ||||
|        Register a15 was used for epc1, then clobbered for rsr. Maybe an | ||||
|        __asm("":::"memory") before starting the asm would help for these cases. | ||||
|        For this instance moved setting epc1 closer to where it was used. | ||||
|        Edit. "&" on output register would have resolved the problem. | ||||
|        Refactored to reduce and consolidate register usage. | ||||
|        However, I have refactored the Extended ASM to reduce and consolidate | ||||
|        register usage and corrected the issue. | ||||
|  | ||||
|        The positioning of the Extended ASM block (as early as possible in the | ||||
|        compiled function) is in part controlled by the immediate need for | ||||
|        output variable `insn`. This placement aids in getting excvaddr read as | ||||
|        early as possible. | ||||
|      */ | ||||
|     uint32_t insn; | ||||
|     __asm( | ||||
|       "movi  %0, ~3\n\t"         /* prepare a mask for the EPC */ | ||||
|       "and   %0, %0, %1\n\t"     /* apply mask for 32bit aligned base */ | ||||
|       "ssa8l %1\n\t"             /* set up shift register for src op */ | ||||
|       "l32i  %1, %0, 0\n\t"      /* load part 1 */ | ||||
|       "l32i  %0, %0, 4\n\t"      /* load part 2 */ | ||||
|       "src   %0, %0, %1\n\t"     /* right shift to get faulting instruction */ | ||||
|       :"=&r"(insn) | ||||
|       :"r"(ef->epc) | ||||
|       : | ||||
|     ); | ||||
|     uint32_t insn, excvaddr; | ||||
| #if 1 | ||||
|     { | ||||
|       uint32_t tmp; | ||||
|       __asm__ ( | ||||
|         "rsr.excvaddr %[vaddr]\n\t"             /* Read faulting address as early as possible */ | ||||
|         "movi.n %[tmp],  ~3\n\t"                /* prepare a mask for the EPC */ | ||||
|         "and    %[tmp],  %[tmp],  %[epc]\n\t"   /* apply mask for 32-bit aligned base */ | ||||
|         "ssa8l  %[epc]\n\t"                     /* set up shift register for src op */ | ||||
|         "l32i   %[insn], %[tmp],  0\n\t"        /* load part 1 */ | ||||
|         "l32i   %[tmp],  %[tmp],  4\n\t"        /* load part 2 */ | ||||
|         "src    %[insn], %[tmp],  %[insn]\n\t"  /* right shift to get faulting instruction */ | ||||
|         : [vaddr]"=&r"(excvaddr), [insn]"=&r"(insn), [tmp]"=&r"(tmp) | ||||
|         : [epc]"r"(ef->epc) :); | ||||
|     } | ||||
|  | ||||
| #else | ||||
|     { | ||||
|       __asm__ __volatile__ ("rsr.excvaddr %0;" : "=r"(excvaddr):: "memory"); | ||||
|       /* | ||||
|         "C" reference code for the ASM to document intent. | ||||
|         May also prove useful when issolating possible issues with Extended ASM, | ||||
|         optimizations, new compilers, etc. | ||||
|       */ | ||||
|       uint32_t epc = ef->epc; | ||||
|       uint32_t *pWord = (uint32_t *)(epc & ~3); | ||||
|       uint64_t big_word = ((uint64_t)pWord[1] << 32) | pWord[0]; | ||||
|       uint32_t pos = (epc & 3) * 8; | ||||
|       insn = (uint32_t)(big_word >>= pos); | ||||
|     } | ||||
| #endif | ||||
|  | ||||
|     uint32_t what = insn & LOAD_MASK; | ||||
|     uint32_t valmask = 0; | ||||
| @@ -102,10 +135,6 @@ static IRAM_ATTR void non32xfer_exception_handler(struct __exception_frame *ef, | ||||
|       --regno;               /* account for skipped a1 in exception_frame */ | ||||
|     } | ||||
|  | ||||
|     uint32_t excvaddr; | ||||
|     /* read out the faulting address */ | ||||
|     __asm("rsr %0, EXCVADDR;" :"=r"(excvaddr)::); | ||||
|  | ||||
| #ifdef DEBUG_ESP_MMU | ||||
|     /* debug option to validate address so we don't hide memory access bugs in APP */ | ||||
|     if (mmu_is_iram((void *)excvaddr) || (is_read && mmu_is_icache((void *)excvaddr))) { | ||||
| @@ -114,31 +143,34 @@ static IRAM_ATTR void non32xfer_exception_handler(struct __exception_frame *ef, | ||||
|       continue;  /* fail */ | ||||
|     } | ||||
| #endif | ||||
|     { | ||||
|       uint32_t *pWord = (uint32_t *)(excvaddr & ~0x3); | ||||
|       uint32_t pos = (excvaddr & 0x3) * 8; | ||||
|       uint32_t mem_val = *pWord; | ||||
|  | ||||
|     if (is_read) { | ||||
|       /* Load, shift and mask down to correct size */ | ||||
|       uint32_t val = (*(uint32_t *)(excvaddr & ~0x3)); | ||||
|       val >>= (excvaddr & 0x3) * 8; | ||||
|       val &= valmask; | ||||
|       if (is_read) { | ||||
|         /* shift and mask down to correct size */ | ||||
|         mem_val >>= pos; | ||||
|         mem_val &= valmask; | ||||
|  | ||||
|       /* Sign-extend for L16SI, if applicable */ | ||||
|       if (what == L16SI_MATCH && (val & 0x8000)) { | ||||
|         val |= 0xffff0000; | ||||
|         /* Sign-extend for L16SI, if applicable */ | ||||
|         if (what == L16SI_MATCH && (mem_val & 0x8000)) { | ||||
|           mem_val |= 0xffff0000; | ||||
|         } | ||||
|  | ||||
|         ef->a_reg[regno] = mem_val;  /* carry out the load */ | ||||
|  | ||||
|       } else { /* is write */ | ||||
|         uint32_t val = ef->a_reg[regno];  /* get value to store from register */ | ||||
|         val <<= pos; | ||||
|         valmask <<= pos; | ||||
|         val &= valmask; | ||||
|  | ||||
|         /* mask out field, and merge */ | ||||
|         mem_val &= (~valmask); | ||||
|         mem_val |= val; | ||||
|         *pWord = mem_val; /* carry out the store */ | ||||
|       } | ||||
|  | ||||
|       ef->a_reg[regno] = val;  /* carry out the load */ | ||||
|  | ||||
|     } else { /* is write */ | ||||
|       uint32_t val = ef->a_reg[regno];  /* get value to store from register */ | ||||
|       val <<= (excvaddr & 0x3) * 8; | ||||
|       valmask <<= (excvaddr & 0x3) * 8; | ||||
|       val &= valmask; | ||||
|  | ||||
|       /* Load, mask out field, and merge */ | ||||
|       uint32_t dst_val = (*(uint32_t *)(excvaddr & ~0x3)); | ||||
|       dst_val &= (~valmask); | ||||
|       dst_val |= val; | ||||
|       (*(uint32_t *)(excvaddr & ~0x3)) = dst_val; /* carry out the store */ | ||||
|     } | ||||
|  | ||||
|     ef->epc += 3;            /* resume at following instruction */ | ||||
| @@ -201,6 +233,7 @@ static void _set_exception_handler_wrapper(int cause) { | ||||
|   } | ||||
| } | ||||
|  | ||||
| void install_non32xfer_exception_handler(void) __attribute__((weak)); | ||||
| void install_non32xfer_exception_handler(void) { | ||||
|   if (NULL == old_c_handler) { | ||||
|     // Set the "C" exception handler the wrapper will call | ||||
|   | ||||
| @@ -189,6 +189,9 @@ _xtos_c_wrapper_handler: | ||||
|         // Restore special registers | ||||
|         l32i    a14,  a1, UEXC_sar | ||||
|  | ||||
|         // load early - saves two cycles - @mhightower83 | ||||
|         movi     a0, _xtos_return_from_exc | ||||
|  | ||||
| // For compatibility with Arduino interrupt architecture, we keep interrupts 100% | ||||
| // disabled. | ||||
| //        /* | ||||
| @@ -201,7 +204,6 @@ _xtos_c_wrapper_handler: | ||||
| //        rsil  a12, 1 // XCHAL_EXCM_LEVEL | ||||
|         rsil  a12, 15  // All levels blocked. | ||||
|         wsr   a14, SAR | ||||
|         movi   a0, _xtos_return_from_exc | ||||
|         jx     a0 | ||||
|  | ||||
|         /* FIXME: what about _GeneralException ? */ | ||||
|   | ||||
		Reference in New Issue
	
	Block a user