1
0
mirror of https://github.com/esp8266/Arduino.git synced 2025-04-19 23:22:16 +03:00
Earle F. Philhower, III 2a5d215977
Reduce the IRAM usage of I2C code by 600-1500 bytes (#6326)
* Reduce the IRAM (and heap) usage of I2C code

The I2C code takes a large chunk of IRAM space.  Attempt to reduce the
size of the routines without impacting functionality.

First, remove the `static` classifier on the sda/scl variables in the
event handlers.  The first instructions in the routines overwrite the
last value stored in them, anyway, and their addresses are never taken.

* Make most variables ints, not uint8_ts

Where it doesn't make a functional difference, make global variables
ints and not unit8_t.  Bytewide updates and extracts require multiple
instructions and hence increase IRAM usage as well as runtime.

* Make local flag vars int

Sketch uses 270855 bytes (25%) of program storage space. Maximum is 1044464 bytes.
Global variables use 27940 bytes (34%) of dynamic memory, leaving 53980 bytes for local variables. Maximum is 81920 bytes.
./xtensa-lx106-elf/bin/xtensa-lx106-elf-objdump -t  -j .text1 /tmp/arduino_build_9615/*elf | sort -k1 | head -20
401000cc l     F .text1	00000014 twi_delay
401000ec l     F .text1	00000020 twi_reply$part$1
4010010c g     F .text1	00000035 twi_reply
4010014c g     F .text1	00000052 twi_stop
401001a0 g     F .text1	0000003b twi_releaseBus
40100204 g     F .text1	000001e6 twi_onTwipEvent
40100404 l     F .text1	000001f7 onSdaChange
40100608 l     F .text1	000002fd onSclChange
40100908 l     F .text1	0000003b onTimer

* Factor out !scl in onSdaChange

If SCL is low then all branches of the case are no-ops, so factor that
portion outo to remove some redundant logic in each case.

Sketch uses 270843 bytes (25%) of program storage space. Maximum is 1044464 bytes.
Global variables use 27944 bytes (34%) of dynamic memory, leaving 53976 bytes for local variables. Maximum is 81920 bytes.

401000cc l     F .text1	00000014 twi_delay
401000ec l     F .text1	00000020 twi_reply$part$1
4010010c g     F .text1	00000035 twi_reply
4010014c g     F .text1	00000052 twi_stop
401001a0 g     F .text1	0000003b twi_releaseBus
40100204 g     F .text1	000001e6 twi_onTwipEvent
40100404 l     F .text1	000001e7 onSdaChange
401005f8 l     F .text1	000002fd onSclChange
401008f8 l     F .text1	0000003b onTimer

0x0000000040107468                _text_end = ABSOLUTE (.)

* Make tiny twi_reply inline

twi_reply is a chunk of code that can be inlined and actually save IRAM
space because certain conditions acan be statically evaluated by gcc.

Sketch uses 270823 bytes (25%) of program storage space. Maximum is 1044464 bytes.
Global variables use 27944 bytes (34%) of dynamic memory, leaving 53976 bytes for local variables. Maximum is 81920 bytes.

401000cc l     F .text1	00000014 twi_delay
401000f4 g     F .text1	00000052 twi_stop
40100148 g     F .text1	0000003b twi_releaseBus
401001b0 g     F .text1	00000206 twi_onTwipEvent
401003d0 l     F .text1	000001e7 onSdaChange
401005c4 l     F .text1	000002fd onSclChange
401008c4 l     F .text1	0000003b onTimer
40100918 g     F .text1	00000085 millis
401009a0 g     F .text1	0000000f micros
401009b0 g     F .text1	00000022 micros64
401009d8 g     F .text1	00000013 delayMicroseconds
401009f0 g     F .text1	00000034 __digitalRead
401009f0  w    F .text1	00000034 digitalRead
40100a3c g     F .text1	000000e4 interrupt_handler
40100b20 g     F .text1	0000000f vPortFree

0x0000000040107434                _text_end = ABSOLUTE (.)

* Inline additional twi_** helper functions

Sketch uses 270799 bytes (25%) of program storage space. Maximum is 1044464 bytes.
Global variables use 27944 bytes (34%) of dynamic memory, leaving 53976 bytes for local variables. Maximum is 81920 bytes.

401000cc l     F .text1	00000014 twi_delay
401000f4  w    F .text1	0000003b twi_releaseBus
4010015c g     F .text1	00000246 twi_onTwipEvent
401003bc l     F .text1	000001e7 onSdaChange
401005b0 l     F .text1	000002f9 onSclChange
401008ac l     F .text1	0000003b onTimer

0x000000004010741c                _text_end = ABSOLUTE (.)

* Convert state machine to 1-hot for faster lookup

GCC won't use a lookup table for the TWI state machine, so it ends up
using a series of straight line compare-jump, compare-jumps to figure
out which branch of code to execute for each state.  For branches that
have multiple states that call them, this can expand to a lot of code.

Short-circuit the whole thing by converting the FSM to a 1-hot encoding
while executing it, and then just and-ing the 1-hot state with the
bitmask of states with the same code.

Sketch uses 270719 bytes (25%) of program storage space. Maximum is 1044464 bytes.
Global variables use 27944 bytes (34%) of dynamic memory, leaving 53976 bytes for local variables. Maximum is 81920 bytes.

401000cc l     F .text1	00000014 twi_delay
401000f4  w    F .text1	0000003b twi_releaseBus
4010015c g     F .text1	00000246 twi_onTwipEvent
401003c0 l     F .text1	000001b1 onSdaChange
40100580 l     F .text1	000002da onSclChange
4010085c l     F .text1	0000003b onTimer

0x00000000401073cc                _text_end = ABSOLUTE (.)

Saves 228 bytes of IRAM vs. master, uses 32 additional bytes of heap.

* Factor out twi_status setting

twi_status is set immediately before  an event handler is called,
resulting in lots of duplicated code.  Set the twi_status flag inside
the handler itself.

Saves an add'l ~100 bytes of IRAM from prior changes, for a total of
~340 bytes.

earle@server:~/Arduino/hardware/esp8266com/esp8266/tools$ ./xtensa-lx106-elf/bin/xtensa-lx106-elf-objdump -t  -j .text1 /tmp/arduino_build_849115/*elf | sort -k1 | head -20

401000cc l     F .text1	00000014 twi_delay
401000f4  w    F .text1	0000003b twi_releaseBus
40100160 g     F .text1	0000024e twi_onTwipEvent
401003c8 l     F .text1	00000181 onSdaChange
40100558 l     F .text1	00000297 onSclChange

* Use a struct to hold globals for TWI

Thanks to the suggestion from @mhightower83, move all global objects
into a struct.  This lets a single base pointer register to be used in
place of constantly reloading the address of each individual variable.

This might be better expressed by moving this to a real C++
implementaion based on a class object (the twi.xxxx would go back to the
old xxx-only naming for vars), but there would then need to be API
wrappers since the functionality is exposed through a plain C API.

Saves 168 additional code bytes, for a grand total of 550 bytes IRAM.

earle@server:~/Arduino/hardware/esp8266com/esp8266/tools$ ./xtensa-lx106-elf/bin/xtensa-lx106-elf-objdump -t  -j .text1 /tmp/arduino_build_849115/*elf | sort -k1 | head -20

401000cc l     F .text1	00000014 twi_delay
401000e8  w    F .text1	00000032 twi_releaseBus
40100128 g     F .text1	00000217 twi_onTwipEvent
4010034c l     F .text1	00000149 onSdaChange
4010049c l     F .text1	00000267 onSclChange
40100704 l     F .text1	00000028 onTimer

* Use enums for states, move one more var to twi struct

Make the TWI states enums and not #defines, in the hope that it will
allow GCC to more easily flag problems and general good code
organization.

401000cc l     F .text1	00000014 twi_delay
401000e8  w    F .text1	00000032 twi_releaseBus
40100128 g     F .text1	00000217 twi_onTwipEvent
4010034c l     F .text1	00000149 onSdaChange
4010049c l     F .text1	00000257 onSclChange
401006f4 l     F .text1	00000028 onTimer

Looks like another 16 bytes IRAM saved from the prior push.

Sketch uses 267079 bytes (25%) of program storage space. Maximum is 1044464 bytes.
Global variables use 27696 bytes (33%) of dynamic memory, leaving 54224 bytes for local variables. Maximum is 81920 bytes.

* Save 4 heap bytes by reprdering struct

* Convert to C++ class, clean up code

Convert the entire file into a C++ class (with C wrappers to preserve
the ABI).  This allows for setting individual values of the global
struct(class) in-situ instead of a cryptic list at the end of the struct
definition.  It also removes a lot of redundant `twi.`s from most class
members.

Clean up the code by converting from `#defines` to inline functions, get
rid of ternarys-as-ifs, use real enums, etc.

For slave_receiver.ino, the numbers are:
GIT Master IRAM: 0x723c
This push IRAM: 0x6fc0

For a savings of 636 total IRAM bytes (note, there may be a slight flash
text increase, but we have 1MB of flash to work with and only 32K of IRAM
so the tradeoff makes sense.

* Run astyle core.conf, clean up space/tab/etc.

Since the C++ version has significant text differences anyway, now is a
good time to clean up the mess of spaces, tabs, and differing cuddles.

* Add enum use comment, rename twi::delay, fix SDA/SCL_READ bool usage

Per review comments

* Replace clock stretch repeated code w/inline loop

There were multiple places where the code was waiting for a slave to
finish stretching the clock.  Factor them out to an *inline* function
to reduce code smell.

* Remove slave code when not using slave mode

Add a new twi_setSlaveMode call which actually attached the interrupts
to the slave pin change code onSdaChenge/onSclChange.  Don't attach
interrupts in the main twi_begin.

Because slave mode is only useful should a onoReceive or onRequest
callback, call twi_setSlaveMode and attach interrupts on the Wire
setters.

This allows GCC to not link in slave code unless slave mode is used,
saving over 1,000 bytes of IRAM in the common, master-only case.
2019-10-14 14:32:41 -07:00

92 lines
2.9 KiB
C++

/*
TwoWire.h - TWI/I2C library for Arduino & Wiring
Copyright (c) 2006 Nicholas Zambetti. All right reserved.
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Modified 2012 by Todd Krein (todd@krein.org) to implement repeated starts
Modified December 2014 by Ivan Grokhotkov (ivan@esp8266.com) - esp8266 support
Modified April 2015 by Hrsto Gochkov (ficeto@ficeto.com) - alternative esp8266 support
*/
#ifndef TwoWire_h
#define TwoWire_h
#include <inttypes.h>
#include "Stream.h"
#define BUFFER_LENGTH 128
class TwoWire : public Stream
{
private:
static uint8_t rxBuffer[];
static uint8_t rxBufferIndex;
static uint8_t rxBufferLength;
static uint8_t txAddress;
static uint8_t txBuffer[];
static uint8_t txBufferIndex;
static uint8_t txBufferLength;
static uint8_t transmitting;
static void (*user_onRequest)(void);
static void (*user_onReceive)(size_t);
static void onRequestService(void);
static void onReceiveService(uint8_t*, size_t);
public:
TwoWire();
void begin(int sda, int scl);
void begin(int sda, int scl, uint8_t address);
void pins(int sda, int scl) __attribute__((deprecated)); // use begin(sda, scl) in new code
void begin();
void begin(uint8_t);
void begin(int);
void setClock(uint32_t);
void setClockStretchLimit(uint32_t);
void beginTransmission(uint8_t);
void beginTransmission(int);
uint8_t endTransmission(void);
uint8_t endTransmission(uint8_t);
size_t requestFrom(uint8_t address, size_t size, bool sendStop);
uint8_t status();
uint8_t requestFrom(uint8_t, uint8_t);
uint8_t requestFrom(uint8_t, uint8_t, uint8_t);
uint8_t requestFrom(int, int);
uint8_t requestFrom(int, int, int);
virtual size_t write(uint8_t);
virtual size_t write(const uint8_t *, size_t);
virtual int available(void);
virtual int read(void);
virtual int peek(void);
virtual void flush(void);
void onReceive(void (*)(int)); // arduino api
void onReceive(void (*)(size_t)); // legacy esp8266 backward compatibility
void onRequest(void (*)(void));
using Print::write;
};
#if !defined(NO_GLOBAL_INSTANCES) && !defined(NO_GLOBAL_TWOWIRE)
extern TwoWire Wire;
#endif
#endif