mirror of
https://github.com/esp8266/Arduino.git
synced 2025-04-19 23:22:16 +03:00
* Correct millis() drift, issue 3078 * Add 'test_millis_mm.ino' runtime benchmark * Eliminate 'punning' warning Add union 'acc' in millis() to eliminate 'punning' compiler warning * Correct minor typo * Eliminate 'punning' warning Add union 'acc' to eliminate 'punning' compiler warning in 'millis_test'_DEBUG() and 'millis_test()'
This commit is contained in:
parent
5c0a57ff6d
commit
3934d10f21
@ -61,11 +61,133 @@ void micros_overflow_tick(void* arg) {
|
|||||||
micros_at_last_overflow_tick = m;
|
micros_at_last_overflow_tick = m;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned long ICACHE_RAM_ATTR millis() {
|
//---------------------------------------------------------------------------
|
||||||
|
// millis() 'magic multiplier' approximation
|
||||||
|
//
|
||||||
|
// This function corrects the cumlative (296us / usec overflow) drift
|
||||||
|
// seen in the orignal 'millis()' function.
|
||||||
|
//
|
||||||
|
// Input:
|
||||||
|
// 'm' - 32-bit usec counter, 0 <= m <= 0xFFFFFFFF
|
||||||
|
// 'c' - 32-bit usec overflow counter 0 <= c < 0x00400000
|
||||||
|
// Output:
|
||||||
|
// Returns milliseconds in modulo 0x1,0000,0000 (0 to 0xFFFFFFFF)
|
||||||
|
//
|
||||||
|
// Notes:
|
||||||
|
//
|
||||||
|
// 1) This routine approximates the 64-bit integer division,
|
||||||
|
//
|
||||||
|
// quotient = ( 2^32 c + m ) / 1000,
|
||||||
|
//
|
||||||
|
// through the use of 'magic' multipliers. A slow division is replaced by
|
||||||
|
// a faster multiply using a scaled multiplicative inverse of the divisor:
|
||||||
|
//
|
||||||
|
// quotient =~ ( 2^32 c + m ) * k, where k = Ceiling[ 2^n / 1000 ]
|
||||||
|
//
|
||||||
|
// The precision difference between multiplier and divisor sets the
|
||||||
|
// upper-bound of the dividend which can be successfully divided.
|
||||||
|
//
|
||||||
|
// For this application, n = 64, and the divisor (1000) has 10-bits of
|
||||||
|
// precision. This sets the dividend upper-bound to (64 - 10) = 54 bits,
|
||||||
|
// and that of 'c' to (54 - 32) = 22 bits. This corresponds to a value
|
||||||
|
// for 'c' = 0x0040,0000 , or +570 years of usec counter overflows.
|
||||||
|
//
|
||||||
|
// 2) A distributed multiply with offset-summing is used find k( 2^32 c + m ):
|
||||||
|
//
|
||||||
|
// prd = (2^32 kh + kl) * ( 2^32 c + m )
|
||||||
|
// = 2^64 kh c + 2^32 kl c + 2^32 kh m + kl m
|
||||||
|
// (d) (c) (b) (a)
|
||||||
|
//
|
||||||
|
// Graphically, the offset-sums align in little endian like this:
|
||||||
|
// LS -> MS
|
||||||
|
// 32 64 96 128
|
||||||
|
// | a[-1] | a[0] | a[1] | a[2] |
|
||||||
|
// | m kl | 0 | 0 | a[-1] not needed
|
||||||
|
// | | m kh | |
|
||||||
|
// | | c kl | | a[1] holds the result
|
||||||
|
// | | | c kh | a[2] can be discarded
|
||||||
|
//
|
||||||
|
// As only the high-word of 'm kl' and low-word of 'c kh' contribute to the
|
||||||
|
// overall result, only (2) 32-bit words are needed for the accumulator.
|
||||||
|
//
|
||||||
|
// 3) As C++ does not intrinsically test for addition overflows, one must
|
||||||
|
// code specifically to detect them. This approximation skips these
|
||||||
|
// overflow checks for speed, hence the sum,
|
||||||
|
//
|
||||||
|
// highword( m kl ) + m kh + c kl < (2^64-1), MUST NOT OVERFLOW.
|
||||||
|
//
|
||||||
|
// To meet this criteria, not only do we have to pick 'k' to achieve our
|
||||||
|
// desired precision, we also have to split 'k' appropriately to avoid
|
||||||
|
// any addition overflows.
|
||||||
|
//
|
||||||
|
// 'k' should be also chosen to align the various products on byte
|
||||||
|
// boundaries to avoid any 64-bit shifts before additions, as they incur
|
||||||
|
// major time penalties. The 'k' chosen for this specific division by 1000
|
||||||
|
// was picked primarily to avoid shifts as well as for precision.
|
||||||
|
//
|
||||||
|
// For the reasons list above, this routine is NOT a general one.
|
||||||
|
// Changing divisors could break the overflow requirement and force
|
||||||
|
// picking a 'k' split which requires shifts before additions.
|
||||||
|
//
|
||||||
|
// ** Test THOROUGHLY after making changes **
|
||||||
|
//
|
||||||
|
// 4) Results of time benchmarks run on an ESP8266 Huzzah feather are:
|
||||||
|
//
|
||||||
|
// usec x Orig Comment
|
||||||
|
// Orig: 3.18 1.00 Original code
|
||||||
|
// Corr: 13.21 4.15 64-bit reference code
|
||||||
|
// Test: 4.60 1.45 64-bit magic multiply, 4x32
|
||||||
|
//
|
||||||
|
// The magic multiplier routine runs ~3x faster than the reference. Execution
|
||||||
|
// times can vary considerably with the numbers being multiplied, so one
|
||||||
|
// should derate this factor to around 2x, worst case.
|
||||||
|
//
|
||||||
|
// Reference function: corrected millis(), 64-bit arithmetic,
|
||||||
|
// truncated to 32-bits by return
|
||||||
|
// unsigned long ICACHE_RAM_ATTR millis_corr_DEBUG( void )
|
||||||
|
// {
|
||||||
|
// // Get usec system time, usec overflow conter
|
||||||
|
// ......
|
||||||
|
// return ( (c * 4294967296 + m) / 1000 ); // 64-bit division is SLOW
|
||||||
|
// } //millis_corr
|
||||||
|
//
|
||||||
|
// 5) See this link for a good discussion on magic multipliers:
|
||||||
|
// http://ridiculousfish.com/blog/posts/labor-of-division-episode-i.html
|
||||||
|
//
|
||||||
|
|
||||||
|
#define MAGIC_1E3_wLO 0x4bc6a7f0 // LS part
|
||||||
|
#define MAGIC_1E3_wHI 0x00418937 // MS part, magic multiplier
|
||||||
|
|
||||||
|
unsigned long ICACHE_RAM_ATTR millis()
|
||||||
|
{
|
||||||
|
union {
|
||||||
|
uint64_t q; // Accumulator, 64-bit, little endian
|
||||||
|
uint32_t a[2]; // ..........., 32-bit segments
|
||||||
|
} acc;
|
||||||
|
acc.a[1] = 0; // Zero high-acc
|
||||||
|
|
||||||
|
// Get usec system time, usec overflow counter
|
||||||
uint32_t m = system_get_time();
|
uint32_t m = system_get_time();
|
||||||
uint32_t c = micros_overflow_count + ((m < micros_at_last_overflow_tick) ? 1 : 0);
|
uint32_t c = micros_overflow_count +
|
||||||
return c * 4294967 + m / 1000;
|
((m < micros_at_last_overflow_tick) ? 1 : 0);
|
||||||
}
|
|
||||||
|
// (a) Init. low-acc with high-word of 1st product. The right-shift
|
||||||
|
// falls on a byte boundary, hence is relatively quick.
|
||||||
|
|
||||||
|
acc.q = ( (uint64_t)( m * (uint64_t)MAGIC_1E3_wLO ) >> 32 );
|
||||||
|
|
||||||
|
// (b) Offset sum, low-acc
|
||||||
|
acc.q += ( m * (uint64_t)MAGIC_1E3_wHI );
|
||||||
|
|
||||||
|
// (c) Offset sum, low-acc
|
||||||
|
acc.q += ( c * (uint64_t)MAGIC_1E3_wLO );
|
||||||
|
|
||||||
|
// (d) Truncated sum, high-acc
|
||||||
|
acc.a[1] += (uint32_t)( c * (uint64_t)MAGIC_1E3_wHI );
|
||||||
|
|
||||||
|
return ( acc.a[1] ); // Extract result, high-acc
|
||||||
|
|
||||||
|
} //millis
|
||||||
|
|
||||||
unsigned long ICACHE_RAM_ATTR micros() {
|
unsigned long ICACHE_RAM_ATTR micros() {
|
||||||
return system_get_time();
|
return system_get_time();
|
||||||
|
491
tests/device/test_millis_mm/test_millis_mm.ino
Normal file
491
tests/device/test_millis_mm/test_millis_mm.ino
Normal file
@ -0,0 +1,491 @@
|
|||||||
|
//
|
||||||
|
// Millis() Runtime Benchmarke
|
||||||
|
//
|
||||||
|
// Code to determine the runtime in 'usec' of various millis()
|
||||||
|
// functions.
|
||||||
|
//
|
||||||
|
|
||||||
|
#include <Arduino.h>
|
||||||
|
#include <ESP8266WiFi.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <BSTest.h>
|
||||||
|
|
||||||
|
// Include API-Headers
|
||||||
|
extern "C" { // SDK functions for Arduino IDE access
|
||||||
|
#include "osapi.h"
|
||||||
|
#include "user_interface.h"
|
||||||
|
#include "espconn.h"
|
||||||
|
} //end, 'extern C'
|
||||||
|
|
||||||
|
//---------------------------------------------------------------------------
|
||||||
|
// Globals
|
||||||
|
//---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
// Here // In 'core_esp8266_wiring.c'
|
||||||
|
static os_timer_t us_ovf_timer; // 'micros_overflow_timer'
|
||||||
|
static uint32_t us_cnt = 0; // 'm' in 'micros_overflow_tick()'
|
||||||
|
static uint32_t us_ovflow = 0; // 'micros_overflow_count'
|
||||||
|
static uint32_t us_at_last_ovf = 0; // 'micros_at_last_overflow_tick'
|
||||||
|
|
||||||
|
static bool fixed_systime = false; // Testing vars
|
||||||
|
static bool debugf = false;
|
||||||
|
static uint32_t us_systime = 0;
|
||||||
|
static float nsf = 0; // Normalization factor
|
||||||
|
static uint32_t cntref = 0; // Ref. comparision count
|
||||||
|
|
||||||
|
//---------------------------------------------------------------------------
|
||||||
|
// Interrupt code lifted directly from "cores/core_esp8266_wiring.c",
|
||||||
|
// with some variable renaming
|
||||||
|
//---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
// Callback for usec counter overflow timer interrupt
|
||||||
|
void us_overflow_tick ( void* arg )
|
||||||
|
{
|
||||||
|
us_cnt = system_get_time();
|
||||||
|
|
||||||
|
// Check for usec counter overflow
|
||||||
|
if ( us_cnt < us_at_last_ovf ) {
|
||||||
|
++us_ovflow;
|
||||||
|
} //end-if
|
||||||
|
us_at_last_ovf = us_cnt;
|
||||||
|
|
||||||
|
} //us_overflow_tick
|
||||||
|
|
||||||
|
//---------------------------------------------------------------------------
|
||||||
|
#define REPEAT 1
|
||||||
|
#define ONCE 0
|
||||||
|
|
||||||
|
void us_count_init ( void )
|
||||||
|
{
|
||||||
|
os_timer_setfn( &us_ovf_timer, (os_timer_func_t*)&us_overflow_tick, 0 );
|
||||||
|
os_timer_arm( &us_ovf_timer, 60000, REPEAT );
|
||||||
|
|
||||||
|
} //us_count_init
|
||||||
|
|
||||||
|
//---------------------------------------------------------------------------
|
||||||
|
// Wrapper(s) for our benchmark
|
||||||
|
//---------------------------------------------------------------------------
|
||||||
|
// Set a fixed value of usec system time
|
||||||
|
void set_systime ( uint32_t usec )
|
||||||
|
{
|
||||||
|
us_systime = usec;
|
||||||
|
} //set_systime
|
||||||
|
|
||||||
|
//---------------------------------------------------------------------------
|
||||||
|
// Wrapper to return a fixed system time
|
||||||
|
uint32_t system_get_timeA ( void )
|
||||||
|
{
|
||||||
|
return ( fixed_systime ? us_systime : system_get_time() );
|
||||||
|
} //system_get_timeA
|
||||||
|
|
||||||
|
//---------------------------------------------------------------------------
|
||||||
|
// Functions to be tested
|
||||||
|
//---------------------------------------------------------------------------
|
||||||
|
// Print integer list as hex
|
||||||
|
void viewhex( uint16_t *p, uint8_t n )
|
||||||
|
{
|
||||||
|
Serial.print( "0x" );
|
||||||
|
for ( uint8_t i = 0; i < n; i++ )
|
||||||
|
{
|
||||||
|
Serial.printf( "%04X ", p[ (n - 1) - i ] );
|
||||||
|
}
|
||||||
|
|
||||||
|
} //viewhex
|
||||||
|
|
||||||
|
//---------------------------------------------------------------------------
|
||||||
|
// Support routine for 'millis_test_DEBUG()'
|
||||||
|
// Print accumulator value along interm summed into it
|
||||||
|
void view_accsum ( const char *desc, uint16_t *acc, uint16_t *itrm )
|
||||||
|
{
|
||||||
|
Serial.print( "acc: " );
|
||||||
|
viewhex( acc, 4 );
|
||||||
|
Serial.printf( " %s = ", desc );
|
||||||
|
viewhex( itrm, 4 );
|
||||||
|
Serial.println();
|
||||||
|
|
||||||
|
} //view_accsum
|
||||||
|
|
||||||
|
//---------------------------------------------------------------------------
|
||||||
|
// FOR BENCHTEST
|
||||||
|
// Original millis() function
|
||||||
|
unsigned long ICACHE_RAM_ATTR millis_orig ( void )
|
||||||
|
{
|
||||||
|
// Get usec system time, usec overflow conter
|
||||||
|
uint32_t m = system_get_time();
|
||||||
|
uint32_t c = us_ovflow + ((m < us_at_last_ovf) ? 1 : 0);
|
||||||
|
|
||||||
|
return ( (c * 4294967) + m / 1000 );
|
||||||
|
|
||||||
|
} //millis_orig
|
||||||
|
|
||||||
|
//---------------------------------------------------------------------------
|
||||||
|
// FOR DEBUG
|
||||||
|
// Corrected millis(), 64-bit arithmetic gold standard
|
||||||
|
// truncated to 32-bits by return
|
||||||
|
unsigned long ICACHE_RAM_ATTR millis_corr_DEBUG( void )
|
||||||
|
{
|
||||||
|
// Get usec system time, usec overflow conter
|
||||||
|
uint32_t m = system_get_timeA(); // DEBUG
|
||||||
|
uint32_t c = us_ovflow + ((m < us_at_last_ovf) ? 1 : 0);
|
||||||
|
|
||||||
|
return ( (c * 4294967296 + m) / 1000 );
|
||||||
|
|
||||||
|
} //millis_corr_DEBUG
|
||||||
|
|
||||||
|
//---------------------------------------------------------------------------
|
||||||
|
// FOR BENCHMARK
|
||||||
|
unsigned long ICACHE_RAM_ATTR millis_corr ( void )
|
||||||
|
{
|
||||||
|
// Get usec system time, usec overflow conter
|
||||||
|
uint32_t m = system_get_time();
|
||||||
|
uint32_t c = us_ovflow + ((m < us_at_last_ovf) ? 1 : 0);
|
||||||
|
|
||||||
|
return ( (c * 4294967296 + m) / 1000 );
|
||||||
|
|
||||||
|
} //millis_corr
|
||||||
|
|
||||||
|
//---------------------------------------------------------------------------
|
||||||
|
// FOR DEBUG
|
||||||
|
// millis() 'magic multiplier' approximation
|
||||||
|
//
|
||||||
|
// This function corrects the cumlative (296us / usec overflow) drift
|
||||||
|
// seen in the orignal 'millis()' function.
|
||||||
|
//
|
||||||
|
// Input:
|
||||||
|
// 'm' - 32-bit usec counter, 0 <= m <= 0xFFFFFFFF
|
||||||
|
// 'c' - 32-bit usec overflow counter 0 <= c < 0x00400000
|
||||||
|
// Output:
|
||||||
|
// Returns milliseconds in modulo 0x1,0000,0000 (0 to 0xFFFFFFFF)
|
||||||
|
//
|
||||||
|
// Notes:
|
||||||
|
//
|
||||||
|
// 1) This routine approximates the 64-bit integer division,
|
||||||
|
//
|
||||||
|
// quotient = ( 2^32 c + m ) / 1000,
|
||||||
|
//
|
||||||
|
// through the use of 'magic' multipliers. A slow division is replaced by
|
||||||
|
// a faster multiply using a scaled multiplicative inverse of the divisor:
|
||||||
|
//
|
||||||
|
// quotient =~ ( 2^32 c + m ) * k, where k = Ceiling[ 2^n / 1000 ]
|
||||||
|
//
|
||||||
|
// The precision difference between multiplier and divisor sets the
|
||||||
|
// upper-bound of the dividend which can be successfully divided.
|
||||||
|
//
|
||||||
|
// For this application, n = 64, and the divisor (1000) has 10-bits of
|
||||||
|
// precision. This sets the dividend upper-bound to (64 - 10) = 54 bits,
|
||||||
|
// and that of 'c' to (54 - 32) = 22 bits. This corresponds to a value
|
||||||
|
// for 'c' = 0x0040,0000 , or +570 years of usec counter overflows.
|
||||||
|
//
|
||||||
|
// 2) A distributed multiply with offset-summing is used find k( 2^32 c + m ):
|
||||||
|
//
|
||||||
|
// prd = (2^32 kh + kl) * ( 2^32 c + m )
|
||||||
|
// = 2^64 kh c + 2^32 kl c + 2^32 kh m + kl m
|
||||||
|
// (d) (c) (b) (a)
|
||||||
|
//
|
||||||
|
// Graphically, the offset-sums align in little endian like this:
|
||||||
|
// LS -> MS
|
||||||
|
// 32 64 96 128
|
||||||
|
// | a[-1] | a[0] | a[1] | a[2] |
|
||||||
|
// | m kl | 0 | 0 | a[-1] not needed
|
||||||
|
// | | m kh | |
|
||||||
|
// | | c kl | | a[1] holds the result
|
||||||
|
// | | | c kh | a[2] can be discarded
|
||||||
|
//
|
||||||
|
// As only the high-word of 'm kl' and low-word of 'c kh' contribute to the
|
||||||
|
// overall result, only (2) 32-bit words are needed for the accumulator.
|
||||||
|
//
|
||||||
|
// 3) As C++ does not intrinsically test for addition overflows, one must
|
||||||
|
// code specifically to detect them. This approximation skips these
|
||||||
|
// overflow checks for speed, hence the sum,
|
||||||
|
//
|
||||||
|
// highword( m kl ) + m kh + c kl < (2^64-1), MUST NOT OVERFLOW.
|
||||||
|
//
|
||||||
|
// To meet this criteria, not only do we have to pick 'k' to achieve our
|
||||||
|
// desired precision, we also have to split 'k' appropriately to avoid
|
||||||
|
// any addition overflows.
|
||||||
|
//
|
||||||
|
// 'k' should be also chosen to align the various products on byte
|
||||||
|
// boundaries to avoid any 64-bit shifts before additions, as they incur
|
||||||
|
// major time penalties. The 'k' chosen for this specific division by 1000
|
||||||
|
// was picked primarily to avoid shifts as well as for precision.
|
||||||
|
//
|
||||||
|
// For the reasons list above, this routine is NOT a general one.
|
||||||
|
// Changing divisors could break the overflow requirement and force
|
||||||
|
// picking a 'k' split which requires shifts before additions.
|
||||||
|
//
|
||||||
|
// ** Test THOROUGHLY after making changes **
|
||||||
|
//
|
||||||
|
// 4) Results of time benchmarks run on an ESP8266 Huzzah feather are:
|
||||||
|
//
|
||||||
|
// usec x Orig Comment
|
||||||
|
// Orig: 3.18 1.00 Original code
|
||||||
|
// Corr: 13.21 4.15 64-bit reference code
|
||||||
|
// Test: 4.60 1.45 64-bit magic multiply, 4x32
|
||||||
|
//
|
||||||
|
// The magic multiplier routine runs ~3x faster than the reference. Execution
|
||||||
|
// times can vary considerably with the numbers being multiplied, so one
|
||||||
|
// should derate this factor to around 2x, worst case.
|
||||||
|
//
|
||||||
|
// Reference function: corrected millis(), 64-bit arithmetic,
|
||||||
|
// truncated to 32-bits by return
|
||||||
|
// unsigned long ICACHE_RAM_ATTR millis_corr_DEBUG( void )
|
||||||
|
// {
|
||||||
|
// // Get usec system time, usec overflow conter
|
||||||
|
// ......
|
||||||
|
// return ( (c * 4294967296 + m) / 1000 ); // 64-bit division is SLOW
|
||||||
|
// } //millis_corr
|
||||||
|
//
|
||||||
|
// 5) See this link for a good discussion on magic multipliers:
|
||||||
|
// http://ridiculousfish.com/blog/posts/labor-of-division-episode-i.html
|
||||||
|
//
|
||||||
|
|
||||||
|
#define M_USEC_MAX 0xFFFFFFFF
|
||||||
|
#define C_COUNT_MAX 0x00400000
|
||||||
|
|
||||||
|
#define MAGIC_1E3_wLO 0x4bc6a7f0 // LS part
|
||||||
|
#define MAGIC_1E3_wHI 0x00418937 // MS part, magic multiplier
|
||||||
|
|
||||||
|
unsigned long ICACHE_RAM_ATTR millis_test_DEBUG ( void )
|
||||||
|
{
|
||||||
|
union {
|
||||||
|
uint64_t q; // Accumulator, 64-bit, little endian
|
||||||
|
uint32_t a[2]; // ..........., 32-bit segments
|
||||||
|
} acc;
|
||||||
|
acc.a[1] = 0; // Zero high-acc
|
||||||
|
|
||||||
|
uint64_t prd; // Interm product
|
||||||
|
|
||||||
|
// Get usec system time, usec overflow counter
|
||||||
|
uint32_t m = system_get_timeA();
|
||||||
|
uint32_t c = us_ovflow + ((m < us_at_last_ovf) ? 1 : 0);
|
||||||
|
|
||||||
|
// DEBUG: Show input vars
|
||||||
|
if ( debugf )
|
||||||
|
Serial.printf( "Test m: 0x%08X c: 0x%08X\n", m, c );
|
||||||
|
|
||||||
|
// (a) Init. low-acc with high-word of 1st product. The right-shift
|
||||||
|
// falls on a byte boundary, hence is relatively quick.
|
||||||
|
|
||||||
|
acc.q = ( (prd = (uint64_t)( m * (uint64_t)MAGIC_1E3_wLO )) >> 32 );
|
||||||
|
|
||||||
|
// DEBUG: Show both accumulator and interm product
|
||||||
|
if( debugf )
|
||||||
|
view_accsum( "m kl", (uint16_t *)&acc.q, (uint16_t *)&prd );
|
||||||
|
|
||||||
|
// (b) Offset sum, low-acc
|
||||||
|
acc.q += ( prd = ( m * (uint64_t)MAGIC_1E3_wHI ) );
|
||||||
|
|
||||||
|
// DEBUG: Show both accumulator and interm product
|
||||||
|
if( debugf )
|
||||||
|
view_accsum( "m kh", (uint16_t *)&acc.q, (uint16_t *)&prd );
|
||||||
|
|
||||||
|
// (c) Offset sum, low-acc
|
||||||
|
acc.q += ( prd = ( c * (uint64_t)MAGIC_1E3_wLO ) );
|
||||||
|
|
||||||
|
// DEBUG: Show both accumulator and interm product
|
||||||
|
if( debugf )
|
||||||
|
view_accsum( "c kl", (uint16_t *)&acc.q, (uint16_t *)&prd );
|
||||||
|
|
||||||
|
// (d) Truncated sum, high-acc
|
||||||
|
acc.a[1] += (uint32_t)( prd = ( c * (uint64_t)MAGIC_1E3_wHI ) );
|
||||||
|
|
||||||
|
// DEBUG: Show both accumulator and interm product
|
||||||
|
if( debugf )
|
||||||
|
view_accsum( "c kh", (uint16_t *)&acc.q, (uint16_t *)&prd );
|
||||||
|
|
||||||
|
return ( acc.a[1] ); // Extract result, high-acc
|
||||||
|
|
||||||
|
} //millis_test_DEBUG
|
||||||
|
|
||||||
|
//---------------------------------------------------------------------------
|
||||||
|
// FOR BENCHTEST
|
||||||
|
unsigned long ICACHE_RAM_ATTR millis_test ( void )
|
||||||
|
{
|
||||||
|
union {
|
||||||
|
uint64_t q; // Accumulator, 64-bit, little endian
|
||||||
|
uint32_t a[2]; // ..........., 32-bit segments
|
||||||
|
} acc;
|
||||||
|
acc.a[1] = 0; // Zero high-acc
|
||||||
|
|
||||||
|
// Get usec system time, usec overflow counter
|
||||||
|
uint32_t m = system_get_time();
|
||||||
|
uint32_t c = us_ovflow + ((m < us_at_last_ovf) ? 1 : 0);
|
||||||
|
|
||||||
|
// (a) Init. low-acc with high-word of 1st product. The right-shift
|
||||||
|
// falls on a byte boundary, hence is relatively quick.
|
||||||
|
|
||||||
|
acc.q = ( (uint64_t)( m * (uint64_t)MAGIC_1E3_wLO ) >> 32 );
|
||||||
|
|
||||||
|
// (b) Offset sum, low-acc
|
||||||
|
acc.q += ( m * (uint64_t)MAGIC_1E3_wHI );
|
||||||
|
|
||||||
|
// (c) Offset sum, low-acc
|
||||||
|
acc.q += ( c * (uint64_t)MAGIC_1E3_wLO );
|
||||||
|
|
||||||
|
// (d) Truncated sum, high-acc
|
||||||
|
acc.a[1] += (uint32_t)( c * (uint64_t)MAGIC_1E3_wHI );
|
||||||
|
|
||||||
|
return ( acc.a[1] ); // Extract result, high-acc
|
||||||
|
|
||||||
|
} //millis_test
|
||||||
|
|
||||||
|
//---------------------------------------------------------------------------
|
||||||
|
// Execution time benchmark
|
||||||
|
//---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
// Print benchmark result
|
||||||
|
void millis_rtms_print ( const char *pream, uint32_t cntx, const char *desc )
|
||||||
|
{
|
||||||
|
Serial.print( pream );
|
||||||
|
Serial.print( nsf * (float)cntx );
|
||||||
|
Serial.print( " " );
|
||||||
|
Serial.print( (float)cntx / (float)cntref );
|
||||||
|
Serial.print( " " );
|
||||||
|
Serial.println( desc );
|
||||||
|
|
||||||
|
} //millis_rtms_print
|
||||||
|
|
||||||
|
//---------------------------------------------------------------------------
|
||||||
|
void Millis_RunTimes ( void )
|
||||||
|
{
|
||||||
|
Serial.println();
|
||||||
|
Serial.println( "Millis() RunTime Benchmarks" );
|
||||||
|
|
||||||
|
uint32_t lc = 100000; // Samples
|
||||||
|
nsf = 1 / float(lc); // Normalization (global)
|
||||||
|
|
||||||
|
uint32_t bgn;
|
||||||
|
uint32_t cnto = 0, cntv = 0;
|
||||||
|
uint32_t cntcx = 0, cntc = 0;
|
||||||
|
uint32_t cntfx = 0, cntf = 0;
|
||||||
|
|
||||||
|
// Setup timer values
|
||||||
|
fixed_systime = true;
|
||||||
|
us_ovflow = C_COUNT_MAX;
|
||||||
|
us_at_last_ovf = (M_USEC_MAX - (20 * 1000000)); // Max. less 20 sec
|
||||||
|
|
||||||
|
// No printing, systime active
|
||||||
|
debugf = false; fixed_systime = false;
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < lc; i++ )
|
||||||
|
{
|
||||||
|
bgn = system_get_time();
|
||||||
|
millis_orig();
|
||||||
|
cnto += system_get_time() - bgn;
|
||||||
|
|
||||||
|
bgn = system_get_time();
|
||||||
|
millis();
|
||||||
|
cntv += system_get_time() - bgn;
|
||||||
|
|
||||||
|
bgn = system_get_time();
|
||||||
|
millis_corr_DEBUG();
|
||||||
|
cntcx += system_get_time() - bgn;
|
||||||
|
|
||||||
|
bgn = system_get_time();
|
||||||
|
millis_corr();
|
||||||
|
cntc += system_get_time() - bgn;
|
||||||
|
|
||||||
|
bgn = system_get_time();
|
||||||
|
millis_test_DEBUG();
|
||||||
|
cntfx += system_get_time() - bgn;
|
||||||
|
|
||||||
|
bgn = system_get_time();
|
||||||
|
millis_test();
|
||||||
|
cntf += system_get_time() - bgn;
|
||||||
|
|
||||||
|
yield();
|
||||||
|
} //end-for
|
||||||
|
|
||||||
|
cntref = cnto; // Set global ref. count
|
||||||
|
|
||||||
|
Serial.println();
|
||||||
|
Serial.println( " usec x Orig Comment" );
|
||||||
|
|
||||||
|
millis_rtms_print( " Orig: ", cntref, "Original code" );
|
||||||
|
millis_rtms_print( " Core: ", cntv, "Current core" );
|
||||||
|
Serial.println();
|
||||||
|
|
||||||
|
millis_rtms_print( " Ref: ", cntcx, "64-bit reference code, DEBUG" );
|
||||||
|
millis_rtms_print( " Test: ", cntfx, "64-bit magic multiply, 4x32 DEBUG" );
|
||||||
|
Serial.println();
|
||||||
|
|
||||||
|
millis_rtms_print( " Ref: ", cntc, "64-bit reference code" );
|
||||||
|
millis_rtms_print( " Test: ", cntf, "64-bit magic multiply, 4x32" );
|
||||||
|
Serial.println();
|
||||||
|
|
||||||
|
Serial.println( F("*** End, Bench Test ***") );
|
||||||
|
Serial.println();
|
||||||
|
|
||||||
|
} //Millis_RunTimes
|
||||||
|
|
||||||
|
//---------------------------------------------------------------------------
|
||||||
|
// Debug millis_test()
|
||||||
|
//---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
bool Debug_Millis_Test ( void )
|
||||||
|
{
|
||||||
|
uint32_t m, msc, mstx;
|
||||||
|
int32_t diff;
|
||||||
|
|
||||||
|
// Switch over to fixed system time, enable printing
|
||||||
|
fixed_systime = true; debugf = true;
|
||||||
|
|
||||||
|
us_ovflow = C_COUNT_MAX;
|
||||||
|
m = (M_USEC_MAX - (0 * 1000000));
|
||||||
|
set_systime( m );
|
||||||
|
us_at_last_ovf = m - 1; // Disables 'c' bump
|
||||||
|
|
||||||
|
// Millis() comparison, test vs. reference
|
||||||
|
Serial.println();
|
||||||
|
mstx = millis_test_DEBUG();
|
||||||
|
msc = millis_corr_DEBUG();
|
||||||
|
diff = (int32_t)(mstx - msc);
|
||||||
|
|
||||||
|
Serial.println();
|
||||||
|
Serial.println( " m Test Reference Difference" );
|
||||||
|
Serial.printf( "0x%08x 0x%08x 0x%08X %9d\n", m, mstx, msc, diff );
|
||||||
|
Serial.println();
|
||||||
|
|
||||||
|
// No printing, variable systime
|
||||||
|
debugf = false; fixed_systime = false;
|
||||||
|
|
||||||
|
return( (bool)( diff == 0 ) ); // Good test, matches reference
|
||||||
|
|
||||||
|
} //Debug_Millis_Test
|
||||||
|
|
||||||
|
//---------------------------------------------------------------------------
|
||||||
|
//---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
BS_ENV_DECLARE();
|
||||||
|
|
||||||
|
void setup ()
|
||||||
|
{
|
||||||
|
Serial.begin(115200);
|
||||||
|
WiFi.mode( WIFI_OFF );
|
||||||
|
us_count_init(); // Start up timer overflow sampling
|
||||||
|
BS_RUN(Serial);
|
||||||
|
|
||||||
|
} //setup
|
||||||
|
|
||||||
|
//---------------------------------------------------------------------------
|
||||||
|
void loop(void)
|
||||||
|
{
|
||||||
|
yield();
|
||||||
|
} //loop
|
||||||
|
|
||||||
|
//---------------------------------------------------------------------------
|
||||||
|
// Test cases
|
||||||
|
//---------------------------------------------------------------------------
|
||||||
|
TEST_CASE( "Millis RunTime Benchmarks", "[bs]" )
|
||||||
|
{
|
||||||
|
Millis_RunTimes();
|
||||||
|
} //testcase1
|
||||||
|
|
||||||
|
TEST_CASE( "Debug 'millis_test()' Code", "[bs]" )
|
||||||
|
{
|
||||||
|
bool ok = Debug_Millis_Test();
|
||||||
|
CHECK( ok );
|
||||||
|
} //testcase2
|
||||||
|
|
||||||
|
//---------------------------------------------------------------------------
|
||||||
|
//---------------------------------------------------------------------------
|
Loading…
x
Reference in New Issue
Block a user