diff --git a/cores/esp8266/core_esp8266_waveform.c b/cores/esp8266/core_esp8266_waveform.c index 8df602020..e443102db 100644 --- a/cores/esp8266/core_esp8266_waveform.c +++ b/cores/esp8266/core_esp8266_waveform.c @@ -38,84 +38,37 @@ */ #include +#include "ets_sys.h" #include "core_esp8266_waveform.h" -// Need speed, not size, here -#pragma GCC optimize ("O2") - // Maximum delay between IRQs #define MAXIRQUS (10000) -// If the cycles from now to an event are below this value, perform it anyway since IRQs take longer than this -#define CYCLES_FLUFF (100) - -// Macro to get count of predefined array elements -#define countof(a) ((size_t)(sizeof(a)/sizeof(a[0]))) - -// Set/clear *any* GPIO -#define SetGPIOPin(a) do { if (a < 16) { GPOS |= (1<high to keep smooth waveform - unsigned enabled : 1; // Is this GPIO generating a waveform? - unsigned nextTimeLowCycles : 31; // Copy over high->low to keep smooth waveform + uint32_t nextServiceCycle; // ESP cycle timer when a transition required + uint32_t expiryCycle; // For time-limited waveform, the cycle when this waveform must stop + uint32_t nextTimeHighCycles; // Copy over low->high to keep smooth waveform + uint32_t nextTimeLowCycles; // Copy over high->low to keep smooth waveform } Waveform; -// These can be accessed in interrupts, so ensure to bracket access with SEI/CLI -static Waveform waveform[] = { - {0, 0, 1<<0, 0, 0, 0, 0, 0}, // GPIO0 - {0, 0, 1<<1, 0, 0, 0, 0, 0}, // GPIO1 - {0, 0, 1<<2, 0, 0, 0, 0, 0}, - {0, 0, 1<<3, 0, 0, 0, 0, 0}, - {0, 0, 1<<4, 0, 0, 0, 0, 0}, - {0, 0, 1<<5, 0, 0, 0, 0, 0}, - // GPIOS 6-8 not allowed, used for flash - // GPIO 9 and 10 only allowed in 2-bit flash mode -#if !isFlashInterfacePin(9) - {0, 0, 1<<9, 0, 0, 0, 0, 0}, - {0, 0, 1<<10, 0, 0, 0, 0, 0}, -#endif - // GPIO 11 not allowed, used for flash - {0, 0, 1<<12, 0, 0, 0, 0, 0}, - {0, 0, 1<<13, 0, 0, 0, 0, 0}, - {0, 0, 1<<14, 0, 0, 0, 0, 0}, - {0, 0, 1<<15, 0, 0, 0, 0, 0}, - {0, 0, 0, 1, 0, 0, 0, 0} // GPIO16 -}; +static Waveform waveform[17]; // State of all possible pins +static volatile uint32_t waveformState = 0; // Is the pin high or low, updated in NMI so no access outside the NMI code +static volatile uint32_t waveformEnabled = 0; // Is it actively running, updated in NMI so no access outside the NMI code -static uint32_t (*timer1CB)() = NULL;; +// Enable lock-free by only allowing updates to waveformState and waveformEnabled from IRQ service routine +static volatile uint32_t waveformToEnable = 0; // Message to the NMI handler to start a waveform on a inactive pin +static volatile uint32_t waveformToDisable = 0; // Message to the NMI handler to disable a pin from waveform generation + +static uint32_t (*timer1CB)() = NULL; -// Helper functions -static inline ICACHE_RAM_ATTR uint32_t MicrosecondsToCycles(uint32_t microseconds) { - return clockCyclesPerMicrosecond() * microseconds; -} - -static inline ICACHE_RAM_ATTR uint32_t min_u32(uint32_t a, uint32_t b) { - if (a < b) { - return a; - } - return b; -} - -static inline ICACHE_RAM_ATTR void ReloadTimer(uint32_t a) { - // Below a threshold you actually miss the edge IRQ, so ensure enough time - if (a > 32) { - timer1_write(a); - } else { - timer1_write(32); - } -} +// Non-speed critical bits +#pragma GCC optimize ("Os") static inline ICACHE_RAM_ATTR uint32_t GetCycleCount() { uint32_t ccount; @@ -125,20 +78,18 @@ static inline ICACHE_RAM_ATTR uint32_t GetCycleCount() { // Interrupt on/off control static ICACHE_RAM_ATTR void timer1Interrupt(); -static uint8_t timerRunning = false; -static uint32_t lastCycleCount = 0; // Last ESP cycle counter on running the interrupt routine +static bool timerRunning = false; static void initTimer() { timer1_disable(); - timer1_isr_init(); - timer1_attachInterrupt(timer1Interrupt); - lastCycleCount = GetCycleCount(); + ETS_FRC_TIMER1_INTR_ATTACH(NULL, NULL); + ETS_FRC_TIMER1_NMI_INTR_ATTACH(timer1Interrupt); timer1_enable(TIM_DIV1, TIM_EDGE, TIM_SINGLE); timerRunning = true; } static void ICACHE_RAM_ATTR deinitTimer() { - timer1_attachInterrupt(NULL); + ETS_FRC_TIMER1_NMI_INTR_ATTACH(NULL); timer1_disable(); timer1_isr_init(); timerRunning = false; @@ -149,171 +100,206 @@ void setTimer1Callback(uint32_t (*fn)()) { timer1CB = fn; if (!timerRunning && fn) { initTimer(); - } else if (timerRunning && !fn) { - int cnt = 0; - for (size_t i = 0; i < countof(waveform); i++) { - cnt += waveform[i].enabled ? 1 : 0; - } - if (!cnt) { - deinitTimer(); - } + timer1_write(microsecondsToClockCycles(1)); // Cause an interrupt post-haste + } else if (timerRunning && !fn && !waveformEnabled) { + deinitTimer(); } - ReloadTimer(MicrosecondsToCycles(1)); // Cause an interrupt post-haste } // Start up a waveform on a pin, or change the current one. Will change to the new // waveform smoothly on next low->high transition. For immediate change, stopWaveform() // first, then it will immediately begin. int startWaveform(uint8_t pin, uint32_t timeHighUS, uint32_t timeLowUS, uint32_t runTimeUS) { - Waveform *wave = NULL; - for (size_t i = 0; i < countof(waveform); i++) { - if (((pin == 16) && waveform[i].gpio16Mask==1) || ((pin != 16) && (waveform[i].gpioMask == 1< 16) || isFlashInterfacePin(pin)) { return false; } - - // To safely update the packed bitfields we need to stop interrupts while setting them as we could - // get an IRQ in the middle of a multi-instruction mask-and-set required to change them which would - // then cause an IRQ update of these values (.enabled only, for now) to be lost. - ets_intr_lock(); - - wave->nextTimeHighCycles = MicrosecondsToCycles(timeHighUS) - 70; // Take out some time for IRQ codepath - wave->nextTimeLowCycles = MicrosecondsToCycles(timeLowUS) - 70; // Take out some time for IRQ codepath - wave->timeLeftCycles = MicrosecondsToCycles(runTimeUS); - if (!wave->enabled) { - wave->state = 0; - // Actually set the pin high or low in the IRQ service to guarantee times - wave->nextServiceCycle = GetCycleCount() + MicrosecondsToCycles(1); - wave->enabled = 1; - if (!timerRunning) { - initTimer(); - } - ReloadTimer(MicrosecondsToCycles(1)); // Cause an interrupt post-haste + Waveform *wave = &waveform[pin]; + // Adjust to shave off some of the IRQ time, approximately + wave->nextTimeHighCycles = microsecondsToClockCycles(timeHighUS); + wave->nextTimeLowCycles = microsecondsToClockCycles(timeLowUS); + wave->expiryCycle = runTimeUS ? GetCycleCount() + microsecondsToClockCycles(runTimeUS) : 0; + if (runTimeUS && !wave->expiryCycle) { + wave->expiryCycle = 1; // expiryCycle==0 means no timeout, so avoid setting it } - // Re-enable interrupts here since we're done with the update - ets_intr_unlock(); + uint32_t mask = 1<nextServiceCycle = GetCycleCount() + microsecondsToClockCycles(1); + waveformToEnable |= mask; + if (!timerRunning) { + initTimer(); + timer1_write(microsecondsToClockCycles(10)); + } else { + // Ensure timely service.... + if (T1L > microsecondsToClockCycles(10)) { + timer1_write(microsecondsToClockCycles(10)); + } + } + while (waveformToEnable) { + delay(0); // Wait for waveform to update + } + } return true; } +// Speed critical bits +#pragma GCC optimize ("O2") +// Normally would not want two copies like this, but due to different +// optimization levels the inline attribute gets lost if we try the +// other version. + +static inline ICACHE_RAM_ATTR uint32_t GetCycleCountIRQ() { + uint32_t ccount; + __asm__ __volatile__("rsr %0,ccount":"=a"(ccount)); + return ccount; +} + +static inline ICACHE_RAM_ATTR uint32_t min_u32(uint32_t a, uint32_t b) { + if (a < b) { + return a; + } + return b; +} + // Stops a waveform on a pin int ICACHE_RAM_ATTR stopWaveform(uint8_t pin) { // Can't possibly need to stop anything if there is no timer active if (!timerRunning) { return false; } - - for (size_t i = 0; i < countof(waveform); i++) { - if (!waveform[i].enabled) { - continue; // Skip fast to next one, can't need to stop this one since it's not running - } - if (((pin == 16) && waveform[i].gpio16Mask) || ((pin != 16) && (waveform[i].gpioMask == 1<0 - // We're also doing that, so even if an IRQ occurred it would still stay as 0. - waveform[i].enabled = 0; - int cnt = timer1CB ? 1 : 0; - for (size_t i = 0; (cnt == 0) && (i < countof(waveform)); i++) { - cnt += waveform[i].enabled ? 1 : 0; - } - if (!cnt) { - deinitTimer(); - } - return true; - } + // If user sends in a pin >16 but <32, this will always point to a 0 bit + // If they send >=32, then the shift will result in 0 and it will also return false + uint32_t mask = 1< microsecondsToClockCycles(10)) { + timer1_write(microsecondsToClockCycles(10)); + } + while (waveformToDisable) { + /* no-op */ // Can't delay() since stopWaveform may be called from an IRQ + } + if (!waveformEnabled && !timer1CB) { + deinitTimer(); + } + return true; } +// The SDK and hardware take some time to actually get to our NMI code, so +// decrement the next IRQ's timer value by a bit so we can actually catch the +// real CPU cycle counter we want for the waveforms. +#if F_CPU == 80000000 + #define DELTAIRQ (microsecondsToClockCycles(3)) +#else + #define DELTAIRQ (microsecondsToClockCycles(2)) +#endif + + static ICACHE_RAM_ATTR void timer1Interrupt() { - uint32_t nextEventCycles; - #if F_CPU == 160000000 - uint8_t cnt = 20; - #else - uint8_t cnt = 10; - #endif + // Optimize the NMI inner loop by keeping track of the min and max GPIO that we + // are generating. In the common case (1 PWM) these may be the same pin and + // we can avoid looking at the other pins. + static int startPin = 0; + static int endPin = 0; - do { - nextEventCycles = MicrosecondsToCycles(MAXIRQUS); - for (size_t i = 0; i < countof(waveform); i++) { - Waveform *wave = &waveform[i]; - uint32_t now; + uint32_t nextEventCycles = microsecondsToClockCycles(MAXIRQUS); + uint32_t timeoutCycle = GetCycleCountIRQ() + microsecondsToClockCycles(14); - // If it's not on, ignore! - if (!wave->enabled) { - continue; - } - - // Check for toggles - now = GetCycleCount(); - int32_t cyclesToGo = wave->nextServiceCycle - now; - if (cyclesToGo < 0) { - wave->state = !wave->state; - if (wave->state) { - SetGPIO(wave->gpioMask); - if (wave->gpio16Mask) { - GP16O |= wave->gpio16Mask; // GPIO16 write slow as it's RMW - } - wave->nextServiceCycle = now + wave->nextTimeHighCycles; - nextEventCycles = min_u32(nextEventCycles, wave->nextTimeHighCycles); - } else { - ClearGPIO(wave->gpioMask); - if (wave->gpio16Mask) { - GP16O &= ~wave->gpio16Mask; - } - wave->nextServiceCycle = now + wave->nextTimeLowCycles; - nextEventCycles = min_u32(nextEventCycles, wave->nextTimeLowCycles); - } - } else { - uint32_t deltaCycles = wave->nextServiceCycle - now; - nextEventCycles = min_u32(nextEventCycles, deltaCycles); - } - } - } while (--cnt && (nextEventCycles < MicrosecondsToCycles(4))); - - uint32_t curCycleCount = GetCycleCount(); - uint32_t deltaCycles = curCycleCount - lastCycleCount; - lastCycleCount = curCycleCount; - - // Check for timed-out waveforms out of the high-frequency toggle loop - for (size_t i = 0; i < countof(waveform); i++) { - Waveform *wave = &waveform[i]; - if (wave->enabled && wave->timeLeftCycles) { - // Check for unsigned underflow with new > old - if (deltaCycles >= wave->timeLeftCycles) { - // Done, remove! - wave->enabled = false; - ClearGPIO(wave->gpioMask); - GP16O &= ~wave->gpio16Mask; - } else { - uint32_t newTimeLeftCycles = wave->timeLeftCycles - deltaCycles; - wave->timeLeftCycles = newTimeLeftCycles; - } - } + if (waveformToEnable || waveformToDisable) { + // Handle enable/disable requests from main app. + waveformEnabled = (waveformEnabled & ~waveformToDisable) | waveformToEnable; // Set the requested waveforms on/off + waveformState &= ~waveformToEnable; // And clear the state of any just started + waveformToEnable = 0; + waveformToDisable = 0; + // Find the first GPIO being generated by checking GCC's find-first-set (returns 1 + the bit of the first 1 in an int32_t) + startPin = __builtin_ffs(waveformEnabled) - 1; + // Find the last bit by subtracting off GCC's count-leading-zeros (no offset in this one) + endPin = 32 - __builtin_clz(waveformEnabled); } + bool done = false; + if (waveformEnabled) { + do { + nextEventCycles = microsecondsToClockCycles(MAXIRQUS); + for (int i = startPin; i <= endPin; i++) { + uint32_t mask = 1<expiryCycle) { + int32_t expiryToGo = wave->expiryCycle - now; + if (expiryToGo < 0) { + // Done, remove! + waveformEnabled &= ~mask; + if (i == 16) { + GP16O &= ~1; + } else { + ClearGPIO(mask); + } + continue; + } + } + + // Check for toggles + int32_t cyclesToGo = wave->nextServiceCycle - now; + if (cyclesToGo < 0) { + waveformState ^= mask; + if (waveformState & mask) { + if (i == 16) { + GP16O |= 1; // GPIO16 write slow as it's RMW + } else { + SetGPIO(mask); + } + wave->nextServiceCycle = now + wave->nextTimeHighCycles; + nextEventCycles = min_u32(nextEventCycles, wave->nextTimeHighCycles); + } else { + if (i == 16) { + GP16O &= ~1; // GPIO16 write slow as it's RMW + } else { + ClearGPIO(mask); + } + wave->nextServiceCycle = now + wave->nextTimeLowCycles; + nextEventCycles = min_u32(nextEventCycles, wave->nextTimeLowCycles); + } + } else { + uint32_t deltaCycles = wave->nextServiceCycle - now; + nextEventCycles = min_u32(nextEventCycles, deltaCycles); + } + } + + // Exit the loop if we've hit the fixed runtime limit or the next event is known to be after that timeout would occur + uint32_t now = GetCycleCountIRQ(); + int32_t cycleDeltaNextEvent = timeoutCycle - (now + nextEventCycles); + int32_t cyclesLeftTimeout = timeoutCycle - now; + done = (cycleDeltaNextEvent < 0) || (cyclesLeftTimeout < 0); + } while (!done); + } // if (waveformEnabled) + if (timer1CB) { nextEventCycles = min_u32(nextEventCycles, timer1CB()); } - #if F_CPU == 160000000 - if (nextEventCycles <= 5 * MicrosecondsToCycles(1)) { - nextEventCycles = MicrosecondsToCycles(1) / 2; - } else { - nextEventCycles -= 5 * MicrosecondsToCycles(1); + if (nextEventCycles < microsecondsToClockCycles(10)) { + nextEventCycles = microsecondsToClockCycles(10); } - nextEventCycles = nextEventCycles >> 1; - #else - if (nextEventCycles <= 6 * MicrosecondsToCycles(1)) { - nextEventCycles = MicrosecondsToCycles(1) / 2; - } else { - nextEventCycles -= 6 * MicrosecondsToCycles(1); - } - #endif + nextEventCycles -= DELTAIRQ; - ReloadTimer(nextEventCycles); + // Do it here instead of global function to save time and because we know it's edge-IRQ +#if F_CPU == 160000000 + T1L = nextEventCycles >> 1; // Already know we're in range by MAXIRQUS +#else + T1L = nextEventCycles; // Already know we're in range by MAXIRQUS +#endif + TEIE |= TEIE1; // Edge int enable }