1
0
mirror of https://github.com/esp8266/Arduino.git synced 2025-10-24 07:13:45 +03:00

Enable 128K virtual memory via external SPI SRAM (#6994)

Provides a transparently accessible additional block of RAM of 128K to
8MB by using an external SPI SRAM.  This memory is managed using the UMM
memory manager and can be used by the core as if it were internal RAM
(albeit much slower to read or write).

The use case would be for things which are quite large but not
particularly frequently used or compute intensive.  For example, the SSL
buffers of 16K++ are a good fit for this, as are the contents of Strings
(both to avoid main heap fragmentation as well as allowing Strings of
>30KB).

A fully associative LRU cache is used to limit the SPI bus bottleneck,
and background writeback is supported.

Uses a define in boards.txt to enable.  If this value is not defined,
then the entire VM routines should not be linked in to user apps
so there should be no space penalty w/o it.

UMM `malloc` and `new` are modified to support internal and external
heap regions.  By default, everything comes from the standard heap, but
a call to `ESP.setExternalHeap()` before the allocation (followed by a
call to `ESP.resetHeap()` will make the allocation come from external
RAM.  See the `virtualmem.ino` example for use.

If there is no external RAM installed, the `setExternalHeap` call is a
no-op.

The String and BearSSL libraries have been modified to use this external
RAM automatically.

Theory of Operation:

The Xtensa core generates a hardware exception (unrelated to C++
exceptions) when an address that's defined as invalid for load or store.
The XTOS ROM routines capture the machine state and call a standard C
exception handler routine (or the default one which resets the system).

We hook into this exception callback and decode the EXCVADDR (the
address being accessed) and use the exception PC to read out the
faulting instruction. We decode that instruction and simulate it's
behavior (i.e. either loading or storing some data to a
register/external memory) and then return to the calling application.

We use the hardware SPI interface to talk to an external SRAM/PSRAM,
and implement a simple cache to minimize the amount of times we need
to go out over the (slow) SPI bus. The SPI is set up in a DIO mode
which uses no more pins than normal SPI, but provides for ~2X faster
transfers.  SIO mode is also supported.

NOTE: This works fine for processor accesses, but cannot be used by
any of the peripherals' DMA. For that, we'd need a real MMU.

Hardware Configuration (only use 3.3V compatible SRAMs!):

  SPI byte-addressible SRAM/PSRAM: 23LC1024 or smaller
    CS   -> GPIO15
    SCK  -> GPIO14
    MOSI -> GPIO13
    MISO -> GPIO12
 (note these are GPIO numbers, not the Arduino Dxx pin names.  Refer
  to your ESP8266 board schematic for the mapping of GPIO to pin.)

Higher density PSRAM (ESP-PSRAM64H/etc.) should work as well, but
I'm still waiting on my chips so haven't done any testing.  Biggest
concern is their command set and functionality in DIO mode.  If DIO
mode isn't supported, then a fallback to SIO is possible.

This PR originated with code from @pvvx's esp8266web server at
https://github.com/pvvx/esp8266web (licensed in the public domain)
but doesn't resemble it much any more.  Thanks, @pvvx!

Keep a list of the last 8 lines in RAM (~.5KB of RAM) and use that to
speed up things like memcpys and other operations where the source and
destination addresses are inside VM RAM.

A custom set of SPI routines is used in the VM system for speed and code
size (and because the core cannot be dependent on a library).

Because UMM manages RAM in 8 byte chunks, attempting to manage the
entire 1M available space on a 1M PSRAM causes the block IDs to
overflow, crashing things at some point.  Limit the UMM allocation to
only 256K in this case.  The remaining space can manually be assigned to
buffers/etc. managed by the application, not malloc()/free().
This commit is contained in:
Earle F. Philhower, III
2021-03-14 18:44:02 -07:00
committed by GitHub
parent c720c0d9e8
commit 8ffe41b7df
12 changed files with 760 additions and 77 deletions

View File

@@ -0,0 +1,138 @@
uint32_t cyclesToRead1Kx32(unsigned int *x, uint32_t *res) {
uint32_t b = ESP.getCycleCount();
uint32_t sum = 0;
for (int i = 0; i < 1024; i++) {
sum += *(x++);
}
*res = sum;
return ESP.getCycleCount() - b;
}
uint32_t cyclesToWrite1Kx32(unsigned int *x) {
uint32_t b = ESP.getCycleCount();
uint32_t sum = 0;
for (int i = 0; i < 1024; i++) {
sum += i;
*(x++) = sum;
}
return ESP.getCycleCount() - b;
}
uint32_t cyclesToRead1Kx16(unsigned short *x, uint32_t *res) {
uint32_t b = ESP.getCycleCount();
uint32_t sum = 0;
for (int i = 0; i < 1024; i++) {
sum += *(x++);
}
*res = sum;
return ESP.getCycleCount() - b;
}
uint32_t cyclesToWrite1Kx16(unsigned short *x) {
uint32_t b = ESP.getCycleCount();
uint32_t sum = 0;
for (int i = 0; i < 1024; i++) {
sum += i;
*(x++) = sum;
}
return ESP.getCycleCount() - b;
}
uint32_t cyclesToRead1Kx8(unsigned char*x, uint32_t *res) {
uint32_t b = ESP.getCycleCount();
uint32_t sum = 0;
for (int i = 0; i < 1024; i++) {
sum += *(x++);
}
*res = sum;
return ESP.getCycleCount() - b;
}
uint32_t cyclesToWrite1Kx8(unsigned char*x) {
uint32_t b = ESP.getCycleCount();
uint32_t sum = 0;
for (int i = 0; i < 1024; i++) {
sum += i;
*(x++) = sum;
}
return ESP.getCycleCount() - b;
}
void setup() {
Serial.begin(115200);
Serial.printf("\n");
// Enabling VM does not change malloc to use the external region. It will continue to
// use the normal RAM until we request otherwise.
uint32_t *mem = (uint32_t *)malloc(1024 * sizeof(uint32_t));
Serial.printf("Internal buffer: Address %p, free %d\n", mem, ESP.getFreeHeap());
// Now request from the VM heap
ESP.setExternalHeap();
uint32_t *vm = (uint32_t *)malloc(1024 * sizeof(uint32_t));
Serial.printf("External buffer: Address %p, free %d\n", vm, ESP.getFreeHeap());
// Make sure we go back to the internal heap for other allocations. Don't forget to ESP.resetHeap()!
ESP.resetHeap();
uint32_t res;
uint32_t t;
t = cyclesToWrite1Kx32(vm);
Serial.printf("Virtual Memory Write: %d cycles for 4K\n", t);
t = cyclesToWrite1Kx32(mem);
Serial.printf("Physical Memory Write: %d cycles for 4K\n", t);
t = cyclesToRead1Kx32(vm, &res);
Serial.printf("Virtual Memory Read: %d cycles for 4K (sum %08x)\n", t, res);
t = cyclesToRead1Kx32(mem, &res);
Serial.printf("Physical Memory Read: %d cycles for 4K (sum %08x)\n", t, res);
t = cyclesToWrite1Kx16((uint16_t*)vm);
Serial.printf("Virtual Memory Write: %d cycles for 2K by 16\n", t);
t = cyclesToWrite1Kx16((uint16_t*)mem);
Serial.printf("Physical Memory Write: %d cycles for 2K by 16\n", t);
t = cyclesToRead1Kx16((uint16_t*)vm, &res);
Serial.printf("Virtual Memory Read: %d cycles for 2K by 16 (sum %08x)\n", t, res);
t = cyclesToRead1Kx16((uint16_t*)mem, &res);
Serial.printf("Physical Memory Read: %d cycles for 2K by 16 (sum %08x)\n", t, res);
t = cyclesToWrite1Kx8((uint8_t*)vm);
Serial.printf("Virtual Memory Write: %d cycles for 1K by 8\n", t);
t = cyclesToWrite1Kx8((uint8_t*)mem);
Serial.printf("Physical Memory Write: %d cycles for 1K by 8\n", t);
t = cyclesToRead1Kx8((uint8_t*)vm, &res);
Serial.printf("Virtual Memory Read: %d cycles for 1K by 8 (sum %08x)\n", t, res);
t = cyclesToRead1Kx8((uint8_t*)mem, &res);
Serial.printf("Physical Memory Read: %d cycles for 1K by 8 (sum %08x)\n", t, res);
// Let's use external heap to make a big ole' String
ESP.setExternalHeap();
String s = "";
for (int i = 0; i < 100; i++) {
s += i;
s += ' ';
}
ESP.resetHeap();
Serial.printf("Internal free: %d\n", ESP.getFreeHeap());
ESP.setExternalHeap();
Serial.printf("External free: %d\n", ESP.getFreeHeap());
ESP.resetHeap();
Serial.printf("String: %s\n", s.c_str());
// Note that free/realloc will all use the heap specified when the pointer was created.
// No need to change heaps to delete an object, only to create it.
free(vm);
free(mem);
Serial.printf("Internal free: %d\n", ESP.getFreeHeap());
ESP.setExternalHeap();
Serial.printf("External free: %d\n", ESP.getFreeHeap());
ESP.resetHeap();
}
void loop() {
}