1
0
mirror of https://github.com/esp8266/Arduino.git synced 2025-04-19 23:22:16 +03:00

CONT stack overflow postmortem (#9083)

- check in cont_run() and cont_suspend() whether a1 is out of bounds
- in case a1 is broken, postmortem will still report proper context in
  proper stack boundaries

additionally
- as suggested in #9069, change stack smashing to a single line that
  does not mention any Exceptions
- reduce overall stack dump length when there are know garbage values i.e. cont stackguard
- decoder.py addr search regexp would no longer skip stack lines with '<'
- fix decoder.py parsing so it notices both stack smashing and alloc errors
This commit is contained in:
Max Prokhorov 2024-03-26 10:58:20 +03:00 committed by GitHub
parent 41ecd65c6a
commit eda4e0855f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 112 additions and 48 deletions

View File

@ -26,8 +26,14 @@
cont_suspend:
/* a1: sp */
/* a2: void* cont_ctx */
/* adjust stack and save registers */
/* adjust stack */
addi a1, a1, -24
/* make sure that a1 points after cont_ctx.stack[] */
addi a4, a2, 32
bltu a1, a4, cont_overflow
/* save registers */
s32i a12, a1, 0
s32i a13, a1, 4
s32i a14, a1, 8
@ -47,6 +53,11 @@ cont_suspend:
l32i a1, a2, 4
jx a0
cont_overflow:
mov.n a3, a1
movi a4, __stack_overflow
jx a4
cont_continue:
l32i a12, a1, 0
l32i a13, a1, 4
@ -113,7 +124,7 @@ cont_run:
bnez a4, cont_resume
/* else */
/* set new stack*/
l32i a1, a2, 16;
l32i a1, a2, 16
/* goto pfn */
movi a2, cont_wrapper
jx a2
@ -121,6 +132,9 @@ cont_run:
cont_resume:
/* a1 <- cont_ctx.sp_suspend */
l32i a1, a2, 12
/* make sure that a1 points after cont_ctx.stack[] */
addi a5, a2, 32
bltu a1, a5, cont_overflow
/* reset yield flag, 0 -> cont_ctx.pc_suspend */
movi a3, 0
s32i a3, a2, 8

View File

@ -22,11 +22,16 @@
#define CONT_H_
#include <stdbool.h>
#include <stdint.h>
#ifndef CONT_STACKSIZE
#define CONT_STACKSIZE 4096
#endif
#ifndef CONT_STACKGUARD
#define CONT_STACKGUARD 0xfeefeffe
#endif
#ifdef __cplusplus
extern "C" {
#endif
@ -62,8 +67,11 @@ void cont_run(cont_t*, void (*pfn)(void));
// execution state (registers and stack)
void cont_suspend(cont_t*);
// Check that cont resume state is valid. Immediately panics on failure.
void cont_check_overflow(cont_t*);
// Check guard bytes around the stack. Immediately panics on failure.
void cont_check(cont_t*);
void cont_check_guard(cont_t*);
// Go through stack and check how many bytes are most probably still unchanged
// and thus weren't used by the user code. i.e. that stack space is free. (high water mark)
@ -78,7 +86,6 @@ bool cont_can_suspend(cont_t* cont);
// free, running the routine, then checking the max free
void cont_repaint_stack(cont_t *cont);
#ifdef __cplusplus
}
#endif

View File

@ -23,39 +23,46 @@
#include <stddef.h>
#include <string.h>
#include "cont.h"
#include "core_esp8266_features.h"
#include "debug.h"
#include "cont.h"
extern "C"
{
static constexpr unsigned int CONT_STACKGUARD { 0xfeefeffe };
static constexpr uint32_t CONT_STACKSIZE_U32 { sizeof(cont_t::stack) / sizeof(*cont_t::stack) };
void cont_init(cont_t* cont) {
memset(cont, 0, sizeof(cont_t));
cont->stack_guard1 = CONT_STACKGUARD;
cont->stack_guard2 = CONT_STACKGUARD;
cont->stack_end = cont->stack + (sizeof(cont->stack) / 4);
cont->stack_end = &cont->stack[0] + CONT_STACKSIZE_U32;
cont->struct_start = (unsigned*) cont;
// fill stack with magic values to check high water mark
for(int pos = 0; pos < (int)(sizeof(cont->stack) / 4); pos++)
for(int pos = 0; pos < (int)(CONT_STACKSIZE_U32); pos++)
{
cont->stack[pos] = CONT_STACKGUARD;
}
}
void IRAM_ATTR cont_check(cont_t* cont) {
if ((cont->stack_guard1 == CONT_STACKGUARD)
&& (cont->stack_guard2 == CONT_STACKGUARD))
void IRAM_ATTR cont_check_guard(cont_t* cont) {
if ((cont->stack_guard1 != CONT_STACKGUARD)
|| (cont->stack_guard2 != CONT_STACKGUARD))
{
return;
}
__stack_chk_fail();
__builtin_unreachable();
}
}
void IRAM_ATTR cont_check_overflow(cont_t* cont) {
if (cont->sp_suspend && (cont->sp_suspend < &cont->stack[0])) {
__stack_overflow(cont, cont->sp_suspend);
__builtin_unreachable();
}
}
// No need for this to be in IRAM, not expected to be IRQ called
int cont_get_free_stack(cont_t* cont) {

View File

@ -261,7 +261,7 @@ static void loop_wrapper() {
}
loop();
loop_end();
cont_check(g_pcont);
cont_check_guard(g_pcont);
if (serialEventRun) {
serialEventRun();
}

View File

@ -48,7 +48,7 @@ static const char* s_unhandled_exception = NULL;
// Common way to notify about where the stack smashing happened
// (but, **only** if caller uses our handler function)
static uint32_t s_stacksmash_addr = 0;
static uint32_t s_stack_chk_addr = 0;
void abort() __attribute__((noreturn));
static void uart_write_char_d(char c);
@ -59,6 +59,7 @@ static void print_stack(uint32_t start, uint32_t end);
// using numbers different from "REASON_" in user_interface.h (=0..6)
enum rst_reason_sw
{
REASON_USER_STACK_OVERFLOW = 252,
REASON_USER_STACK_SMASH = 253,
REASON_USER_SWEXCEPTION_RST = 254
};
@ -188,7 +189,7 @@ static void postmortem_report(uint32_t sp_dump) {
}
else if (rst_info.reason == REASON_SOFT_WDT_RST) {
ets_printf_P(PSTR("\nSoft WDT reset"));
const char infinite_loop[] = { 0x06, 0xff, 0xff }; // loop: j loop
const uint8_t infinite_loop[] = { 0x06, 0xff, 0xff }; // loop: j loop
if (is_pc_valid(rst_info.epc1) && 0 == memcmp_P(infinite_loop, (PGM_VOID_P)rst_info.epc1, 3u)) {
// The SDK is riddled with these. They are usually preceded by an ets_printf.
ets_printf_P(PSTR(" - deliberate infinite loop detected"));
@ -198,17 +199,23 @@ static void postmortem_report(uint32_t sp_dump) {
rst_info.exccause, /* Address executing at time of Soft WDT level-1 interrupt */ rst_info.epc1, 0, 0, 0, 0);
}
else if (rst_info.reason == REASON_USER_STACK_SMASH) {
ets_printf_P(PSTR("\nStack smashing detected.\n"));
ets_printf_P(PSTR("\nException (%d):\nepc1=0x%08x epc2=0x%08x epc3=0x%08x excvaddr=0x%08x depc=0x%08x\n"),
5 /* Alloca exception, closest thing to stack fault*/, s_stacksmash_addr, 0, 0, 0, 0);
ets_printf_P(PSTR("\nStack smashing detected at 0x%08x\n"), s_stack_chk_addr);
}
else if (rst_info.reason == REASON_USER_STACK_OVERFLOW) {
ets_printf_P(PSTR("\nStack overflow detected\n"));
}
else {
ets_printf_P(PSTR("\nGeneric Reset\n"));
}
uint32_t cont_stack_start = (uint32_t) &(g_pcont->stack);
uint32_t cont_stack_end = (uint32_t) g_pcont->stack_end;
uint32_t stack_end;
uint32_t cont_stack_start;
if (rst_info.reason == REASON_USER_STACK_SMASH) {
cont_stack_start = s_stack_chk_addr;
} else {
cont_stack_start = (uint32_t) (&g_pcont->stack[0]);
}
uint32_t cont_stack_end = cont_stack_start + CONT_STACKSIZE;
// amount of stack taken by interrupt or exception handler
// and everything up to __wrap_system_restart_local
@ -249,15 +256,21 @@ static void postmortem_report(uint32_t sp_dump) {
sp_dump = stack_thunk_get_cont_sp();
}
if (sp_dump > cont_stack_start && sp_dump < cont_stack_end) {
uint32_t stack_end;
// above and inside of cont, dump from the sp to the bottom of the stack
if ((rst_info.reason == REASON_USER_STACK_OVERFLOW)
|| ((sp_dump > cont_stack_start) && (sp_dump < cont_stack_end)))
{
ets_printf_P(PSTR("\nctx: cont\n"));
stack_end = cont_stack_end;
}
// in system, reposition to a known address
// it's actually 0x3ffffff0, but the stuff below ets_run
// is likely not really relevant to the crash
else {
ets_printf_P(PSTR("\nctx: sys\n"));
stack_end = 0x3fffffb0;
// it's actually 0x3ffffff0, but the stuff below ets_run
// is likely not really relevant to the crash
}
ets_printf_P(PSTR("sp: %08x end: %08x offset: %04x\n"), sp_dump, stack_end, offset);
@ -296,8 +309,17 @@ static void print_stack(uint32_t start, uint32_t end) {
for (uint32_t pos = start; pos < end; pos += 0x10) {
uint32_t* values = (uint32_t*)(pos);
// avoid printing irrelevant data
if ((values[0] == CONT_STACKGUARD)
&& (values[0] == values[1])
&& (values[1] == values[2])
&& (values[2] == values[3]))
{
continue;
}
// rough indicator: stack frames usually have SP saved as the second word
bool looksLikeStackFrame = (values[2] == pos + 0x10);
const bool looksLikeStackFrame = (values[2] == pos + 0x10);
ets_printf_P(PSTR("%08x: %08x %08x %08x %08x %c\n"),
pos, values[0], values[1], values[2], values[3], (looksLikeStackFrame) ? '<' : ' ');
@ -370,7 +392,7 @@ void __panic_func(const char* file, int line, const char* func) {
uintptr_t __stack_chk_guard = 0x08675309 ^ RANDOM_REG32;
void __stack_chk_fail(void) {
s_user_reset_reason = REASON_USER_STACK_SMASH;
s_stacksmash_addr = (uint32_t)__builtin_return_address(0);
s_stack_chk_addr = (uint32_t)__builtin_return_address(0);
if (gdb_present())
__asm__ __volatile__ ("syscall"); // triggers GDB when enabled
@ -382,4 +404,16 @@ void __stack_chk_fail(void) {
__builtin_unreachable(); // never reached, needed to satisfy "noreturn" attribute
}
void __stack_overflow(cont_t* cont, uint32_t* sp) {
s_user_reset_reason = REASON_USER_STACK_OVERFLOW;
s_stack_chk_addr = (uint32_t)&cont->stack[0];
if (gdb_present())
__asm__ __volatile__ ("syscall"); // triggers GDB when enabled
postmortem_report((uint32_t)sp);
__builtin_unreachable(); // never reached, needed to satisfy "noreturn" attribute
}
} // extern "C"

View File

@ -4,6 +4,8 @@
#include <stddef.h>
#include <stdint.h>
#include "cont.h"
#define _DEBUG_LEAF_FUNCTION(...) __asm__ __volatile__("" ::: "a0", "memory")
#ifdef DEBUG_ESP_CORE
@ -32,6 +34,7 @@ extern "C"
{
#endif
void __stack_chk_fail(void) __attribute__((noreturn));
void __stack_overflow(cont_t*, uint32_t*) __attribute__((noreturn));
void __unhandled_exception(const char* str) __attribute__((noreturn));
void __panic_func(const char* file, int line, const char* func) __attribute__((noreturn));
#define panic() __panic_func(PSTR(__FILE__), __LINE__, __func__)

View File

@ -52,6 +52,7 @@ EXCEPTION_CODES = (
"permit stores",
)
# similar to java version, which used `list` and re-formatted it
# instead, simply use an already short-format `info line`
# TODO `info symbol`? revert to `list`?
@ -96,12 +97,12 @@ def addresses_addr2line(addr2line, elf, addresses):
def decode_lines(format_addresses, elf, lines):
STACK_RE = re.compile(r"^[0-9a-f]{8}:\s+([0-9a-f]{8} ?)+ *$")
ANY_ADDR_RE = re.compile(r"0x[0-9a-fA-F]{8}|[0-9a-fA-F]{8}")
HEX_ADDR_RE = re.compile(r"0x[0-9a-f]{8}")
LAST_ALLOC_RE = re.compile(
r"last failed alloc call: ([0-9a-fA-F]{8})\(([0-9]+)\).*"
)
LAST_ALLOC = "last failed alloc"
MEM_ERR_LINE_RE = re.compile(r"^(Stack|last failed alloc call)")
STACK_LINE_RE = re.compile(r"^[0-9a-f]{8}:\s\s+")
CUT_HERE_STRING = "CUT HERE FOR EXCEPTION DECODER"
EXCEPTION_STRING = "Exception ("
@ -131,13 +132,11 @@ def decode_lines(format_addresses, elf, lines):
stack_addresses = print_all_addresses(stack_addresses)
last_stack = line.strip()
# 3fffffb0: feefeffe feefeffe 3ffe85d8 401004ed
elif in_stack and STACK_RE.match(line):
stack, addrs = line.split(":")
addrs = addrs.strip()
addrs = addrs.split(" ")
elif in_stack and STACK_LINE_RE.match(line):
_, addrs = line.split(":")
addrs = ANY_ADDR_RE.findall(addrs)
stack_addresses.setdefault(last_stack, [])
for addr in addrs:
stack_addresses[last_stack].append(addr)
stack_addresses[last_stack].extend(addrs)
# epc1=0xfffefefe epc2=0xfefefefe epc3=0xefefefef excvaddr=0xfefefefe depc=0xfefefefe
elif EPC_STRING in line:
pairs = line.split()
@ -152,13 +151,13 @@ def decode_lines(format_addresses, elf, lines):
elif EXCEPTION_STRING in line:
number = line.strip()[len(EXCEPTION_STRING) : -2]
print(f"Exception ({number}) - {EXCEPTION_CODES[int(number)]}")
# stack smashing detected at <ADDR>
# last failed alloc call: <ADDR>(<NUMBER>)[@<maybe file loc>]
elif LAST_ALLOC in line:
values = LAST_ALLOC_RE.match(line)
if values:
addr, size = values.groups()
elif MEM_ERR_LINE_RE.match(line):
for addr in ANY_ADDR_RE.findall(line):
line = line.replace(addr, format_address(addr))
print()
print(f"Allocation of {size} bytes failed: {format_address(addr)}")
print(line.strip())
# postmortem guards our actual stack dump values with these
elif ">>>stack>>>" in line:
in_stack = True