From e85c3e5515fd86da7de11a1b2ea47c669c77ff3a Mon Sep 17 00:00:00 2001 From: Graham Sanderson Date: Tue, 4 Feb 2025 16:19:17 -0600 Subject: [PATCH] rationalize pico_float/pico_double libraries (#2208) * on RP2350 _dcp variant now enables -msoft-float, since if you're using this at all it is likely because you don't want to use the VFP unit at all (to save stack space) * implement all float_ and double_ conversion functions in all pico_float_pico_ variants and pico_double_pico on RP2040 and RP2350 (many were missing in some combinations) * provide better granularity of what functions are wrapped in each case also marked custom_xxx_funcs_test.c as not in bazel build yet --- .../hardware_dma/include/hardware/dma.h | 2 +- src/rp2_common/pico_double/double_aeabi_dcp.S | 112 +++- .../pico_double/double_aeabi_rp2040.S | 89 +++ src/rp2_common/pico_double/double_conv_m33.S | 64 ++- src/rp2_common/pico_double/double_fma_dcp.S | 3 +- .../pico_double/include/pico/double.h | 176 +++++- src/rp2_common/pico_float/BUILD.bazel | 37 +- src/rp2_common/pico_float/CMakeLists.txt | 71 ++- src/rp2_common/pico_float/float_aeabi_dcp.S | 171 ++++-- .../pico_float/float_aeabi_rp2040.S | 29 +- .../{float_conv_m33.S => float_common_m33.S} | 49 +- src/rp2_common/pico_float/float_conv32_vfp.S | 106 ++++ .../pico_float/include/pico/float.h | 292 ++++++++-- test/pico_float_test/BUILD.bazel | 9 + test/pico_float_test/CMakeLists.txt | 27 + .../custom_double_funcs_test.c | 515 ++++++++++++++++++ .../pico_float_test/custom_float_funcs_test.c | 402 ++++++++++++++ 17 files changed, 2012 insertions(+), 142 deletions(-) rename src/rp2_common/pico_float/{float_conv_m33.S => float_common_m33.S} (85%) create mode 100644 src/rp2_common/pico_float/float_conv32_vfp.S create mode 100644 test/pico_float_test/custom_double_funcs_test.c create mode 100644 test/pico_float_test/custom_float_funcs_test.c diff --git a/src/rp2_common/hardware_dma/include/hardware/dma.h b/src/rp2_common/hardware_dma/include/hardware/dma.h index 8bb35ec1..94b74cb7 100644 --- a/src/rp2_common/hardware_dma/include/hardware/dma.h +++ b/src/rp2_common/hardware_dma/include/hardware/dma.h @@ -535,7 +535,7 @@ static inline void dma_channel_start(uint channel) { *\endcode * * \if rp2350_specific - * RP2350 only: Due to errata RP12350-E5 (see the RP2350 datasheet for further detail), it is necessary to clear the enable bit of + * RP2350 only: Due to errata RP2350-E5 (see the RP2350 datasheet for further detail), it is necessary to clear the enable bit of * the aborted channel and any chained channels prior to the abort to prevent re-triggering. * \endif * diff --git a/src/rp2_common/pico_double/double_aeabi_dcp.S b/src/rp2_common/pico_double/double_aeabi_dcp.S index 9579c70e..8e055648 100644 --- a/src/rp2_common/pico_double/double_aeabi_dcp.S +++ b/src/rp2_common/pico_double/double_aeabi_dcp.S @@ -7,7 +7,7 @@ #include "pico/asm_helper.S" #if !HAS_DOUBLE_COPROCESSOR -#error attempt to compile double_aeabi_rp2350 when there is no DCP +#error attempt to compile double_aeabi_dcp when there is no DCP #else #include "hardware/dcp_instr.inc.S" @@ -29,7 +29,7 @@ double_section WRAPPER_FUNC_NAME(\func) // ============== STATE SAVE AND RESTORE =============== -.macro saving_func type func +.macro saving_func type func, opt_label1='-', opt_label2='-' // Note we are usually 32-bit aligned already at this point, as most of the // function bodies contain exactly two 16-bit instructions: bmi and bx lr. // We want the PCMP word-aligned. @@ -41,6 +41,12 @@ double_section WRAPPER_FUNC_NAME(\func) push {lr} // 16-bit instruction bl generic_save_state // 32-bit instruction b 1f // 16-bit instruction +.ifnc \opt_label1,'-' +regular_func \opt_label1 +.endif +.ifnc \opt_label2,'-' +regular_func \opt_label2 +.endif // This is the actual entry point: \type\()_func \func PCMP apsr_nzcv @@ -128,53 +134,124 @@ saving_func wrapper sqrt dcp_dsqrt_m r0,r1,r0,r1,r0,r1,r2,r3,r12 saving_func_return -// todo not a real thing -double_wrapper_section __aeabi_dclassify -saving_func wrapper __aeabi_dclassify -@ with correct rounding +double_section dclassify +saving_func regular dclassify dcp_dclassify_m apsr_nzcv,r0,r1 saving_func_return // ============== CONVERSION FUNCTIONS =============== double_wrapper_section __aeabi_d2f -saving_func wrapper __aeabi_d2f +saving_func wrapper __aeabi_d2f double2float @ with rounding dcp_double2float_m r0,r0,r1 saving_func_return double_wrapper_section __aeabi_i2d -saving_func wrapper __aeabi_i2d +saving_func wrapper __aeabi_i2d int2double dcp_int2double_m r0,r1,r0 saving_func_return double_wrapper_section __aeabi_ui2d -saving_func wrapper __aeabi_ui2d +saving_func wrapper __aeabi_ui2d uint2double dcp_uint2double_m r0,r1,r0 saving_func_return +double_section double2fix_z +saving_func regular double2fix_z + ubfx r3, r1, #20, #11 + adds r3, r2 + beq 1f // very small; we don't care that we might make a denormal + asrs ip, r3, #11 + beq 1f + ite pl + movpl r3, #0x7ff + movsmi r3, #0 +1: + bfi r1, r3, #20, #11 + b double2int_z_entry + +double_section double2ufix +saving_func regular double2ufix_z double2ufix +double2ufix_z_entry: + ubfx r3, r1, #20, #11 + adds r3, r2 + beq 1f // very small; we don't care that we might make a denormal + asrs ip, r3, #11 + beq 1f + ite pl + lsrspl r3, r1, #20 // 0x7ff + movsmi r3, #0 +1: + bfi r1, r3, #20, #11 + b double2uint_z_entry + +double_section double2fix +saving_func regular double2fix + ubfx r3, r1, #20, #11 + cbz r3, 2f // 0 or denormal + adds r3, r2 + beq 1f // very small; we don't care that we might make a denormal + asrs ip, r3, #11 + beq 1f + ite pl + movpl r3, #0x7ff + movsmi r3, #0 +1: + bfi r1, r3, #20, #11 + b double2int_entry +2: + movs r0, #0 +saving_func_return + + +double_section double2int +saving_func regular double2int +double2int_entry: + lsls r2, r1, #1 + bcc double2int_z_entry // positive is ok for int64_z + lsrs r3, r2, #21 + beq double2int_z_entry // 0 or -0 or denormal is ok for int_z + + lsrs r2, #21 + adds r2, #1 + subs r2, r2, #0x400 + bcc 1f // <1 means subtract 1 + cmp r2, #31 + bge double2int_z_entry // must be an integer or maxed out + lsls r3, r1, #12 + adds r3, r3, r0, lsr #20 // r3 now has highest 32 mantissa bits + lsls r3, r2 + orrs r3, r3, r0, lsl #12 // these bits are all guaranteed to be in the fraction + beq double2int_z_entry // integer +1: + dcp_double2int_m r0,r0,r1 + subs r0, #1 +saving_func_return + double_wrapper_section __aeabi_d2iz -saving_func wrapper __aeabi_d2iz +saving_func wrapper __aeabi_d2iz double2int_z +double2int_z_entry: @ with truncation towards 0 dcp_double2int_m r0,r0,r1 + // note: this works with either saved or not saved call as it is just a `bx lr` saving_func_return double_wrapper_section __aeabi_d2uiz -saving_func wrapper __aeabi_d2uiz +saving_func wrapper __aeabi_d2uiz double2uint double2uint_z +double2uint_z_entry: @ with truncation towards 0 dcp_double2uint_m r0,r0,r1 saving_func_return -// todo not a real thing -double_wrapper_section __aeabi_d2i_r -saving_func wrapper __aeabi_d2i_r +double_section double2int_r +saving_func regular double2int_r @ with rounding dcp_double2int_r_m r0,r0,r1 saving_func_return -// todo not a real thing -double_wrapper_section __aeabi_d2ui_r -saving_func wrapper __aeabi_d2ui_r +double_section double2uint_r +saving_func regular double2uint_r @ with rounding dcp_double2uint_r_m r0,r0,r1 saving_func_return @@ -189,7 +266,6 @@ saving_func wrapper __aeabi_dcmpun saving_func_return double_wrapper_section __aeabi_dcmp - saving_func wrapper __aeabi_cdrcmple dcp_dcmp_m apsr_nzcv,r2,r3,r0,r1 // with arguments reversed bvs cmp_nan diff --git a/src/rp2_common/pico_double/double_aeabi_rp2040.S b/src/rp2_common/pico_double/double_aeabi_rp2040.S index 284846fd..448b2835 100644 --- a/src/rp2_common/pico_double/double_aeabi_rp2040.S +++ b/src/rp2_common/pico_double/double_aeabi_rp2040.S @@ -425,6 +425,7 @@ double_wrapper_section __aeabi_ui2d double_wrapper_section __aeabi_i2d wrapper_func __aeabi_ui2d +regular_func uint2double movs r1, #0 cmp r0, #0 bne 2f @@ -432,6 +433,7 @@ wrapper_func __aeabi_ui2d bx lr // double FUNC_NAME(__aeabi_i2d)(int) integer to double (double precision) conversion wrapper_func __aeabi_i2d +regular_func int2double asrs r1, r0, #31 eors r0, r1 subs r0, r1 @@ -506,6 +508,7 @@ regular_func double2int // unsigned FUNC_NAME(__aeabi_d2uiz)(double) double (double precision) to unsigned C-style conversion [3] double_wrapper_section __aeabi_d2uiz wrapper_func __aeabi_d2uiz +regular_func double2uint_z regular_func double2uint shimmable_table_tail_call SF_TABLE_FLOAT2UINT double2uint_shim @@ -528,11 +531,13 @@ regular_func ufix642double // double FUNC_NAME(__aeabi_l2d)(long long) long long to double (double precision) conversion double_wrapper_section __aeabi_l2d wrapper_func __aeabi_l2d +regular_func int642double shimmable_table_tail_call SF_TABLE_INT642FLOAT int642double_shim // double FUNC_NAME(__aeabi_l2f)(long long) long long to double (double precision) conversion double_wrapper_section __aeabi_ul2d wrapper_func __aeabi_ul2d +regular_func uint642double shimmable_table_tail_call SF_TABLE_UINT642FLOAT uint642double_shim // long long FUNC_NAME(__aeabi_d2lz)(double) double (double precision) to long long C-style conversion [3] @@ -566,22 +571,106 @@ regular_func double2int64 // unsigned long long FUNC_NAME(__aeabi_d2ulz)(double) double to unsigned long long C-style conversion [3] double_wrapper_section __aeabi_d2ulz wrapper_func __aeabi_d2ulz +regular_func double2uint64 +regular_func double2uint64_z shimmable_table_tail_call SF_TABLE_FLOAT2UINT64 double2uint64_shim +double_section double2fix64_z +regular_func double2fix64_z + lsls r3, r1, #1 + bcc double2fix64 // input positive is ok for fix64 + mov ip, r2 + asrs r2, r3, #21 + beq 3f // input zero or denormal, so just return zero + adds r2, #1 + beq double2fix64 // input infinite/nan is ok for fix64 + + lsrs r3, #21 + add r3, ip + movs r2, #1 + negs r2, r2 + lsrs r2, #22 + subs r3, r2 // r3 = modified e - 0x3ff + + bcc 3f // modified input < 1.0 means result is zero + cmp r3, #52 + bge 2f // modified input must be an integer or infinite + + adds r3, #12 + mov r2, r1 + lsls r2, r2, r3 // r2 has remaining fractional mantissa bits of r1 + bne 1f // not integer as non zero fractional bits remain + subs r3, #32 + asrs r2, r3, #31 + bics r3, r3, r2 + movs r2, r0 + lsls r2, r2, r3 + bne 1f // remaining fractional bits are non-zero, so argument was not an integer +2: + // integer + mov r2, ip + b double2fix64 +3: // result is zero + movs r0, #0 + movs r1, #0 + bx lr +1: + push {lr} + mov r2, ip + bl double2fix64 + movs r2, #0 + adds r0, #1 + adcs r1, r2 + pop {pc} + double_section double2fix64 regular_func double2fix64 shimmable_table_tail_call SF_TABLE_FLOAT2FIX64 double2fix64_shim double_section double2ufix64 regular_func double2ufix64 +regular_func double2ufix64_z shimmable_table_tail_call SF_TABLE_FLOAT2UFIX64 double2ufix64_shim double_section double2fix regular_func double2fix shimmable_table_tail_call SF_TABLE_FLOAT2FIX double2fix_shim +double_section double2fix_z +regular_func double2fix_z + lsls r3, r1, #1 + asrs r3, #21 + beq 2f // input is zero or denormal + adds r3, #1 + beq 3f // input is infinite or nan + + // extract exponent again + lsls r3, r1, #1 + lsrs r3, #21 + // adjust + adds r3, r2 + ble 2f // adjusted input is zero or dedornmal or < 1 + lsrs r3, r3, #11 + bne 3f // adjusted input is > infinite + + lsls r2, r2, #20 // align exponent adjustment offset + adds r1, r1, r2 // we know adjustment is safe + b double2int_z +2: + // result is zero + movs r0, #0 + bx lr +3: + movs r0, #0 + subs r0, #1 + lsrs r0, #1 + asrs r1, #31 + eors r0, r1 + bx lr + double_section double2ufix regular_func double2ufix +regular_func double2ufix_z shimmable_table_tail_call SF_TABLE_FLOAT2UFIX double2ufix_shim double_wrapper_section __aeabi_d2f diff --git a/src/rp2_common/pico_double/double_conv_m33.S b/src/rp2_common/pico_double/double_conv_m33.S index 606cbfc3..d927d73c 100644 --- a/src/rp2_common/pico_double/double_conv_m33.S +++ b/src/rp2_common/pico_double/double_conv_m33.S @@ -249,7 +249,69 @@ regular_func ufix2double movs r1,#0 bx r14 -double_wrapper_section conv_dtoi64 +double_section conv_dtoi64 +regular_func double2int64 + lsls r3, r1, #1 + bcc double2int64_z // input positive is ok for int64_z + cmp r3, #0xffe00000 + bcs double2int64_z // input is infinite + lsrs r3, #21 + beq 2f // input zero or denormal, means answer remains zero + sub r3, #0x3ff + cmp r3, #0 + blt 1f // input is less than 1.0 + cmp r3, #52 + bge double2int64_z // modified input must be an integer or infinite + adds r3, #12 + lsls r2, r1, r3 // r2 has remaining fractional mantissa bits of r1 + bne 1f // not integer as non zero fractional bits remain + subs r3, #32 + bics r3, r3, r3, asr #31 // map negative shift to zero + lsls r3, r0, r3 + beq double2int64_z // remaining fractional bits are 0, so argument was an integer +1: + push {lr} + bl double2int64_z + subs r0, #1 + sbcs r1, r1, #0 + pop {pc} +2: + movs r0, #0 + movs r1, #0 + bx lr + +double_section conv_dtofix64 +regular_func double2fix64 + lsls r3, r1, #1 + bcc double2fix64_z // input positive is ok for fix64_z + cmp r3, #0xffe00000 + bcs double2fix64_z // input is infinite + lsrs r3, #21 + beq 2f // input zero or denormal, means answer remains zero + sub r3, #0x3ff + adds r3, r2 + blt 1f // modified input zero or denormal, or less than 1.0 + cmp r3, #52 + bge double2fix64_z // modified input must be an integer or infinite + adds r3, #12 + lsls ip, r1, r3 // ip has remaining fractional mantissa bits of r1 + bne 1f // not integer as non zero fractional bits remain + subs r3, #32 + bics r3, r3, r3, asr #31 // map negative shift to zero + lsls r3, r0, r3 + beq double2fix64_z // remaining fractional bits are 0, so argument was an integer +1: + push {lr} + bl double2fix64_z + subs r0, #1 + sbcs r1, r1, #0 + pop {pc} +2: + movs r0, #0 + movs r1, #0 + bx lr + +double_wrapper_section conv_dtoi64_z @ convert double to signed int64, rounding towards 0, clamping wrapper_func __aeabi_d2lz diff --git a/src/rp2_common/pico_double/double_fma_dcp.S b/src/rp2_common/pico_double/double_fma_dcp.S index 30f669bd..bb810d0f 100644 --- a/src/rp2_common/pico_double/double_fma_dcp.S +++ b/src/rp2_common/pico_double/double_fma_dcp.S @@ -582,7 +582,7 @@ wrapper_func fma saving_func_return -double_wrapper_section __dmla +double_section fma_fast @ cf saving_func macro: but here we need to record the SP before the state save possibly changes it 1: push {lr} // 16-bit instruction @@ -592,6 +592,7 @@ double_wrapper_section __dmla @ r0:r1 m @ r2:r3 n @ [r13,#0] a +regular_func fma_fast regular_func mla mov r12,sp @ save the SP PCMP apsr_nzcv @ test the engaged flag diff --git a/src/rp2_common/pico_double/include/pico/double.h b/src/rp2_common/pico_double/include/pico/double.h index 5af8c9e4..6805078f 100644 --- a/src/rp2_common/pico_double/include/pico/double.h +++ b/src/rp2_common/pico_double/include/pico/double.h @@ -16,50 +16,153 @@ extern "C" { #endif /** \file double.h -* \defgroup pico_double pico_double +* \defgroup pico_double pico_double * * \brief Optimized double-precision floating point functions * -* (Replacement) optimized implementations are provided of the following compiler built-ins -* and math library functions: +* An application can take control of the floating point routines used in the application over and above what is provided by the compiler, +* by depending on the pico_double library. A user might want to do this: * -* - __aeabi_dadd, __aeabi_ddiv, __aeabi_dmul, __aeabi_drsub, __aeabi_dsub, __aeabi_cdcmpeq, __aeabi_cdrcmple, __aeabi_cdcmple, __aeabi_dcmpeq, __aeabi_dcmplt, __aeabi_dcmple, __aeabi_dcmpge, __aeabi_dcmpgt, __aeabi_dcmpun, __aeabi_i2d, __aeabi_l2d, __aeabi_ui2d, __aeabi_ul2d, __aeabi_d2iz, __aeabi_d2lz, __aeabi_d2uiz, __aeabi_d2ulz, __aeabi_d2f -* - sqrt, cos, sin, tan, atan2, exp, log, ldexp, copysign, trunc, floor, ceil, round, asin, acos, atan, sinh, cosh, tanh, asinh, acosh, atanh, exp2, log2, exp10, log10, pow,, hypot, cbrt, fmod, drem, remainder, remquo, expm1, log1p, fma -* - powint, sincos (GNU extensions) +* 1. To use optimized software implementations provided by the RP2-series device's bootrom or the SDK +* 2. To use optimized combined software/hardware implementations utilizing custom RP2-series hardware for acceleration +* 3. To control the amount of C compiler/library code bloat +* 4. To make sure no floating point is called at all * -* The following additional optimized functions are also provided: +* The pico_double library comes in three main flavors: * -* - int2double, uint2double, int642double, uint642double, fix2double, ufix2double, fix642double, ufix642double -* - double2fix, double2ufix, double2fix64, double2ufix64, double2int, double2uint, double2int64, double2uint64, double2int_z, double2int64_z, -* - exp10, sincos, powint +* 1. `pico_double_none` - all floating point operations cause a \ref panic - no double-precision floating point code is included +* 2. `pico_double_compiler` - no custom functions are provided; all double-precision floating point is handled by the C compiler/library +* 3. `pico_double_pico` - the smallest and fastest available for the platform, along with additional functionality (e.g. fixed point conversions) which are detailed below * -* On RP2350 the following additional functions are available; the _fast methods are faster but do not round correctly" +* The user can control which version they want (e.g. **pico_double_xxx** by either setting the CMake global variable +* `PICO_DEFAULT_DOUBLE_IMPL=xxx`, or by using the CMake function `pico_set_double_implementation( xxx)`. Note that in the absence +* of either, pico_double_pico is used by default. * -* - ddiv_fast, sqrt_fast +* \if rp2040_specific +* On RP2040, `pico_double_pico` uses optimized hand coded implementations from the bootrom and the SDK for both +* basic double-precision floating point operations and floating point math library functions. These implementations +* are generally faster and smaller than those provided by the C compiler/library, though they don't support all the features of a fully compliant +* floating point implementation; they are however usually fine for the majority of cases +* \endif +* +* \if rp2350_specific +* On RP2350, `pico_double_pico` uses RP2350 DCP instructions (double co-processor) to implement fast version of the basic +* arithmetic functions, and provides optimized M33 implementations of trignometric and scientific functions. +* These implementations are generally faster and smaller than those provided by the C compiler/library, though they don't support all the features of a fully compliant +* floating point implementation; they are however usually fine for the majority of cases +* \endif +* +* On Arm, (replacement) optimized implementations are provided for the following compiler built-ins +* and math library functions when using `pico_double_pico`: +* +* - basic arithmetic: +* +* __aeabi_dadd, __aeabi_ddiv, __aeabi_dmul, __aeabi_drsub, __aeabi_dsub +* +* - comparison: +* +* __aeabi_cfcmpeq, __aeabi_cfrcmple, __aeabi_cfcmple, __aeabi_dcmpeq, __aeabi_dcmplt, __aeabi_dcmple, __aeabi_dcmpge, __aeabi_dcmpgt, __aeabi_dcmpun +* +* - (u)int32 <-> double: +* +* __aeabi_i2d, __aeabi_ui2d, __aeabi_d2iz, __aeabi_d2uiz +* +* - (u)int64 <-> double: +* +* __aeabi_l2d, __aeabi_ul2d, __aeabi_d2lz, __aeabi_d2ulz +* +* - double -> float: +* +* __aeabi_d2d +* +* - basic trigonometric: +* +* sqrt, cos, sin, tan, atan2, exp, log +* +* - trigonometric and scientific +* +* ldexp, copysign, trunc, floor, ceil, round, asin, acos, atan, sinh, cosh, tanh, asinh, acosh, atanh, exp2, log2, exp10, log10, pow, hypot, cbrt, fmod, drem, remainder, remquo, expm1, log1p, fma +* +* - GNU exetnsions: +* +* powint, sincos +* +* On Arm, the following additional optimized functions are also provided when using `pico_double_pico`: +* +* - Conversions to/from integer types: +* +* - (u)int -> double (round to nearest): +* +* int2double, uint2double, int642double, uint642double +* +* - (u)double -> int (round towards zero): +* +* double2int_z, double2uint_z, double2int64_z, double2uint64_z +* +* - (u)double -> int (round towards -infinity): +* +* double2int, double2uint, double2int64, double2uint64 +* +* - Conversions to/from fixed point integers: +* +* - (u)fix -> double (round to nearest): +* +* fix2double, ufix2double, fix642double, ufix642double +* +* - double -> (u)fix (round towards zero): +* +* double2fix_z, double2ufix_z, double2fix64_z, double2ufix64_z +* +* - double -> (u)fix (round towards -infinity): +* +* double2fix, double2ufix, double2fix64, double2ufix64 +* +* - Even faster versions of divide and square-root functions that do not round correctly: +* +* ddiv_fast, sqrt_fast (these do not round correctly) +* +* - Faster unfused multiply and accumulate: +* +* mla (fast fma) +* +* \if rp2350_specific +* On RISC-V there is no custom double-precision floating point support, so `pico_double_pico` is equivalent to `pico_double_compiler` +* \endif */ +#if !defined(__riscv) || PICO_COMBINED_DOCS +#if PICO_COMBINED_DOCS || !LIB_PICO_DOUBLE_COMPILER double int2double(int32_t i); -double uint2double(uint32_t u); +double uint2double(uint32_t i); double int642double(int64_t i); -double uint642double(uint64_t u); +double uint642double(uint64_t i); double fix2double(int32_t m, int e); double ufix2double(uint32_t m, int e); double fix642double(int64_t m, int e); double ufix642double(uint64_t m, int e); -// These methods round towards -Infinity. -int32_t double2fix(double d, int e); -uint32_t double2ufix(double d, int e); -int64_t double2fix64(double d, int e); -uint64_t double2ufix64(double d, int e); -int32_t double2int(double d); -uint32_t double2uint(double d); -int64_t double2int64(double d); -uint64_t double2uint64(double d); +// These methods round towards 0, which IS the C way +int32_t double2int_z(double f); +int64_t double2int64_z(double f); +int32_t double2uint_z(double f); +int64_t double2uint64_z(double f); +int32_t double2fix_z(double f, int e); +uint32_t double2ufix_z(double f, int e); +int64_t double2fix64_z(double f, int e); +uint64_t double2ufix64_z(double f, int e); -// These methods round towards 0. -int32_t double2int_z(double d); -int64_t double2int64_z(double d); +// These methods round towards -Infinity - which IS NOT the C way for negative numbers; +// as such the naming is not ideal, however is kept for backwards compatibility +int32_t double2int(double f); +uint32_t double2uint(double f); +int64_t double2int64(double f); +uint64_t double2uint64(double f); +int32_t double2fix(double f, int e); +uint32_t double2ufix(double f, int e); +int64_t double2fix64(double f, int e); +uint64_t double2ufix64(double f, int e); + +#endif double exp10(double x); void sincos(double x, double *sinx, double *cosx); @@ -67,8 +170,24 @@ double powint(double x, int y); #if !PICO_RP2040 || PICO_COMBINED_DOCS double ddiv_fast(double n, double d); -double sqrt_fast(double d); -double mla(double x, double y, double z); // note this is not fused +double sqrt_fast(double f); +double fma_fast(double x, double y, double z); // this is not fused +double mla(double x, double y, double z); // another name for fma_fast +#endif + +#endif + +#if LIB_PICO_DOUBLE_COMPILER || defined(__riscv) +// when using the compiler; we provide as many functions as we trivially can, though in the double case they are not optimal +static inline double int2double(int32_t i) { return (double)i; } +static inline double uint2double(uint32_t i) { return (double)i; } +static inline double int642double(int64_t i) { return (double)i; } +static inline double uint642double(uint64_t i) { return (double)i; } + +static inline int32_t double2int_z(double d) { return (int32_t)d; } +static inline int64_t double2int64_z(double d) { return (int64_t)d; } +static inline int32_t double2uint_z(double d) { return (uint32_t)d; } +static inline int64_t double2uint64_z(double d) { return (uint64_t)d; } #endif #ifdef __cplusplus @@ -76,4 +195,3 @@ double mla(double x, double y, double z); // note this is not fused #endif #endif - diff --git a/src/rp2_common/pico_float/BUILD.bazel b/src/rp2_common/pico_float/BUILD.bazel index f6aadca2..8d4ab363 100644 --- a/src/rp2_common/pico_float/BUILD.bazel +++ b/src/rp2_common/pico_float/BUILD.bazel @@ -2,13 +2,16 @@ load("//bazel:defs.bzl", "compatible_with_rp2", "incompatible_with_config") package(default_visibility = ["//visibility:public"]) -_WRAP_FLOAT_AEABI_FLAGS = [ +_WRAP_FLOAT_AEABI_ARITHMETIC_FLAGS = [ "-Wl,--wrap=__aeabi_fadd", "-Wl,--wrap=__aeabi_fdiv", "-Wl,--wrap=__aeabi_fmul", "-Wl,--wrap=__aeabi_frsub", "-Wl,--wrap=__aeabi_fsub", "-Wl,--wrap=__aeabi_cfcmpeq", +] + +_WRAP_FLOAT_AEABI_CMP_FLAGS = [ "-Wl,--wrap=__aeabi_cfrcmple", "-Wl,--wrap=__aeabi_cfcmple", "-Wl,--wrap=__aeabi_fcmpeq", @@ -17,15 +20,27 @@ _WRAP_FLOAT_AEABI_FLAGS = [ "-Wl,--wrap=__aeabi_fcmpge", "-Wl,--wrap=__aeabi_fcmpgt", "-Wl,--wrap=__aeabi_fcmpun", +] + +_WRAP_FLOAT_AEABI_CONV_32_FLAGS = [ "-Wl,--wrap=__aeabi_i2f", "-Wl,--wrap=__aeabi_l2f", "-Wl,--wrap=__aeabi_ui2f", "-Wl,--wrap=__aeabi_ul2f", +] + +_WRAP_FLOAT_AEABI_CONV_64_FLAGS = [ "-Wl,--wrap=__aeabi_f2iz", "-Wl,--wrap=__aeabi_f2lz", "-Wl,--wrap=__aeabi_f2uiz", "-Wl,--wrap=__aeabi_f2ulz", +] + +_WRAP_FLOAT_AEABI_CONV_DOUBLE_FLAGS = [ "-Wl,--wrap=__aeabi_f2d", +] + +_WRAP_FLOAT_SQRTF_FLAGS = [ "-Wl,--wrap=sqrtf", ] @@ -36,13 +51,16 @@ _WRAP_FLOAT_SCI_FLAGS = [ "-Wl,--wrap=atan2f", "-Wl,--wrap=expf", "-Wl,--wrap=logf", + "-Wl,--wrap=sincosf", # gnu +] + +_WRAP_FLOAT_SCI_EXTRA_FLAGS = [ "-Wl,--wrap=ldexpf", "-Wl,--wrap=copysignf", "-Wl,--wrap=truncf", "-Wl,--wrap=floorf", "-Wl,--wrap=ceilf", "-Wl,--wrap=roundf", - "-Wl,--wrap=sincosf", # gnu "-Wl,--wrap=asinf", "-Wl,--wrap=acosf", "-Wl,--wrap=atanf", @@ -114,30 +132,31 @@ _PICO_FLOAT_IMPLS = [ ], "compatibility": incompatible_with_config("@platforms//cpu:riscv32") + ["//bazel/constraint:rp2040"], "extra_deps": [], - "linkopts": _WRAP_FLOAT_AEABI_FLAGS + _WRAP_FLOAT_SCI_FLAGS, + "linkopts": _WRAP_FLOAT_AEABI_ARITHMETIC_FLAGS + _WRAP_FLOAT_AEABI_CMP_FLAGS + _WRAP_FLOAT_AEABI_CONV_32_FLAGS + _WRAP_FLOAT_AEABI_CONV_64_FLAGS + _WRAP_FLOAT_AEABI_CONV_DOUBLE_FLAGS + _WRAP_FLOAT_SQRTF_FLAGS + _WRAP_FLOAT_SCI_FLAGS + _WRAP_FLOAT_SCI_EXTRA_FLAGS, }, { "name": "dcp", "srcs": [ "float_aeabi_dcp.S", - "float_conv_m33.S", + "float_common_m33.S", "float_math.c", "float_sci_m33.S", ], "compatibility": compatible_with_rp2() + incompatible_with_config("@platforms//cpu:riscv32") + incompatible_with_config("//bazel/constraint:rp2040"), "extra_deps": ["//src/rp2_common/hardware_dcp"], - "linkopts": _WRAP_FLOAT_SCI_FLAGS, + "linkopts": _WRAP_FLOAT_AEABI_ARITHMETIC_FLAGS + _WRAP_FLOAT_AEABI_CMP_FLAGS + _WRAP_FLOAT_AEABI_CONV_32_FLAGS + _WRAP_FLOAT_AEABI_CONV_64_FLAGS + _WRAP_FLOAT_AEABI_CONV_DOUBLE_FLAGS + _WRAP_FLOAT_SQRTF_FLAGS + _WRAP_FLOAT_SCI_FLAGS + _WRAP_FLOAT_SCI_EXTRA_FLAGS, }, { "name": "vfp", "srcs": [ + "float_conv32_vfp.S", "float_sci_m33_vfp.S", - "float_conv_m33.S", + "float_common_m33.S", "float_math.c", ], "compatibility": compatible_with_rp2() + incompatible_with_config("@platforms//cpu:riscv32") + incompatible_with_config("//bazel/constraint:rp2040"), "extra_deps": ["//src/rp2_common/hardware_dcp"], - "linkopts": _WRAP_FLOAT_SCI_FLAGS, + "linkopts": _WRAP_FLOAT_AEABI_CONV_64_FLAGS + _WRAP_FLOAT_SCI_FLAGS + _WRAP_FLOAT_SCI_EXTRA_FLAGS, }, { "name": "single_hazard3", @@ -146,7 +165,7 @@ _PICO_FLOAT_IMPLS = [ ], "compatibility": compatible_with_rp2() + ["@platforms//cpu:riscv32"], "extra_deps": ["//src/rp2_common/hardware_hazard3"], - "linkopts": _WRAP_FLOAT_SCI_FLAGS, + "linkopts": _WRAP_FLOAT_SCI_EXTRA_FLAGS, }, ] @@ -184,7 +203,7 @@ cc_library( hdrs = ["include/pico/float.h"], defines = ["LIB_PICO_FLOAT_PICO=0"], includes = ["include"], - linkopts = _WRAP_FLOAT_AEABI_FLAGS + _WRAP_FLOAT_SCI_FLAGS, + linkopts = _WRAP_FLOAT_AEABI_ARITHMETIC_FLAGS + _WRAP_FLOAT_AEABI_CMP_FLAGS + _WRAP_FLOAT_AEABI_CONV_32_FLAGS + _WRAP_FLOAT_AEABI_CONV_64_FLAGS + _WRAP_FLOAT_AEABI_CONV_DOUBLE_FLAGS + _WRAP_FLOAT_SQRTF_FLAGS + _WRAP_FLOAT_SCI_FLAGS + _WRAP_FLOAT_SCI_EXTRA_FLAGS, target_compatible_with = compatible_with_rp2(), visibility = ["//visibility:private"], deps = [ diff --git a/src/rp2_common/pico_float/CMakeLists.txt b/src/rp2_common/pico_float/CMakeLists.txt index 7d53274f..f634f094 100644 --- a/src/rp2_common/pico_float/CMakeLists.txt +++ b/src/rp2_common/pico_float/CMakeLists.txt @@ -18,13 +18,15 @@ $>,$,${PICO_DEFAULT_FLOAT_IMPL}>) function(wrap_float_functions TARGET) - cmake_parse_arguments(WRAP_FLOAT "NO_WRAP_AEABI;NO_WRAP_SCI" "" "" ${ARGN} ) - if (NOT WRAP_FLOAT_NO_WRAP_AEABI) + cmake_parse_arguments(WRAP_FLOAT "NO_AEABI_ARITHMETIC;NO_AEABI_CMP;NO_AEABI_CONV_32;NO_AEABI_CONV_64;NO_AEABI_CONV_DOUBLE;NO_SQRTF;NO_SCI;NO_SCI_EXTRA" "" "" ${ARGN} ) + if (NOT WRAP_FLOAT_NO_AEABI_ARITHMETIC) pico_wrap_function(${TARGET} __aeabi_fadd) pico_wrap_function(${TARGET} __aeabi_fdiv) pico_wrap_function(${TARGET} __aeabi_fmul) pico_wrap_function(${TARGET} __aeabi_frsub) pico_wrap_function(${TARGET} __aeabi_fsub) + endif() + if (NOT WRAP_FLOAT_NO_AEABI_CMP) pico_wrap_function(${TARGET} __aeabi_cfcmpeq) pico_wrap_function(${TARGET} __aeabi_cfrcmple) pico_wrap_function(${TARGET} __aeabi_cfcmple) @@ -34,32 +36,42 @@ pico_wrap_function(${TARGET} __aeabi_fcmpge) pico_wrap_function(${TARGET} __aeabi_fcmpgt) pico_wrap_function(${TARGET} __aeabi_fcmpun) + endif() + if (NOT WRAP_FLOAT_NO_AEABI_CONV_32) pico_wrap_function(${TARGET} __aeabi_i2f) - pico_wrap_function(${TARGET} __aeabi_l2f) pico_wrap_function(${TARGET} __aeabi_ui2f) - pico_wrap_function(${TARGET} __aeabi_ul2f) pico_wrap_function(${TARGET} __aeabi_f2iz) - pico_wrap_function(${TARGET} __aeabi_f2lz) pico_wrap_function(${TARGET} __aeabi_f2uiz) + endif() + if (NOT WRAP_FLOAT_NO_AEABI_CONV_64) + pico_wrap_function(${TARGET} __aeabi_l2f) + pico_wrap_function(${TARGET} __aeabi_ul2f) + pico_wrap_function(${TARGET} __aeabi_f2lz) pico_wrap_function(${TARGET} __aeabi_f2ulz) + endif() + if (NOT WRAP_FLOAT_NO_AEABI_CONV_DOUBLE) pico_wrap_function(${TARGET} __aeabi_f2d) + endif() + # separate as we have a direct DCP version + if (NOT WRAP_FLOAT_NO_SQRTF) pico_wrap_function(${TARGET} sqrtf) endif() - if (NOT WRAP_FLOAT_NO_WRAP_SCI) + if (NOT WRAP_FLOAT_NO_SCI) pico_wrap_function(${TARGET} cosf) pico_wrap_function(${TARGET} sinf) pico_wrap_function(${TARGET} tanf) pico_wrap_function(${TARGET} atan2f) pico_wrap_function(${TARGET} expf) pico_wrap_function(${TARGET} logf) - + pico_wrap_function(${TARGET} sincosf) # gnu + endif() + if (NOT WRAP_FLOAT_NO_SCI_EXTRA) pico_wrap_function(${TARGET} ldexpf) pico_wrap_function(${TARGET} copysignf) pico_wrap_function(${TARGET} truncf) pico_wrap_function(${TARGET} floorf) pico_wrap_function(${TARGET} ceilf) pico_wrap_function(${TARGET} roundf) - pico_wrap_function(${TARGET} sincosf) # gnu pico_wrap_function(${TARGET} asinf) pico_wrap_function(${TARGET} acosf) pico_wrap_function(${TARGET} atanf) @@ -93,7 +105,9 @@ ) target_link_libraries(pico_float_none INTERFACE pico_float_headers) - wrap_float_functions(pico_float_none) + wrap_float_functions(pico_float_none) # we wrap all functions + # be explicit that there should be no floating point instructions + target_compile_options(pico_float_none INTERFACE -msoft-float) pico_add_library(pico_float_pico) if (PICO_RP2040) @@ -107,21 +121,52 @@ target_link_libraries(pico_float_pico INTERFACE pico_bootrom pico_float_headers hardware_divider) elseif(NOT PICO_RISCV) pico_add_library(pico_float_pico_dcp) + # todo what functions from float_math belong in each case; should some be left to GCC on RP2350? target_sources(pico_float_pico_dcp INTERFACE ${CMAKE_CURRENT_LIST_DIR}/float_math.c ${CMAKE_CURRENT_LIST_DIR}/float_aeabi_dcp.S + ${CMAKE_CURRENT_LIST_DIR}/float_common_m33.S ${CMAKE_CURRENT_LIST_DIR}/float_sci_m33.S - ${CMAKE_CURRENT_LIST_DIR}/float_conv_m33.S ) - wrap_float_functions(pico_float_pico_dcp NO_WRAP_AEABI) + # NOTE the main reason for using pico_float_pico_dcp is presumably that you + # don't want to use VFP at all, so turn off compiler support, otherwise, it will inline usages + target_compile_options(pico_float_pico_dcp INTERFACE -msoft-float) + + wrap_float_functions(pico_float_pico_dcp + # we wrap all functions as we don't want to use VFP (or compiler versions) at all + #NO_AEABI_ARITHMETIC + #NO_AEABI_CMP + #NO_AEABI_CONV_32 + #NO_AEABI_CONV_64 + #NO_AEABI_CONV_DOUBLE + #NO_SQRTF + #NO_SCI + #NO_SCI_EXTRA + ) + pico_add_library(pico_float_pico_vfp) target_sources(pico_float_pico_vfp INTERFACE ${CMAKE_CURRENT_LIST_DIR}/float_math.c + ${CMAKE_CURRENT_LIST_DIR}/float_conv32_vfp.S + ${CMAKE_CURRENT_LIST_DIR}/float_common_m33.S ${CMAKE_CURRENT_LIST_DIR}/float_sci_m33_vfp.S - ${CMAKE_CURRENT_LIST_DIR}/float_conv_m33.S ) - wrap_float_functions(pico_float_pico_vfp NO_WRAP_AEABI) + wrap_float_functions(pico_float_pico_vfp + # for these 3, arguably compiler is probably inlining anyway, but use the cmopiler's + # version for explicit AEABI calls + NO_AEABI_ARITHMETIC + NO_AEABI_CMP + NO_AEABI_CONV_32 + #NO_AEABI_CONV_64 # we have optimized M33 versions + NO_AEABI_CONV_DOUBLE + # we don't have an optimized vfp or m33 sqrtf available + NO_SQRTF + #NO_SCI # we have optimized VFP versions + #NO_SCI_EXTRA # todo - are our versions better than what GCC proides? + ) + + target_link_libraries(pico_float_pico INTERFACE pico_float_pico_vfp) else() diff --git a/src/rp2_common/pico_float/float_aeabi_dcp.S b/src/rp2_common/pico_float/float_aeabi_dcp.S index 61c24091..de170d3e 100644 --- a/src/rp2_common/pico_float/float_aeabi_dcp.S +++ b/src/rp2_common/pico_float/float_aeabi_dcp.S @@ -5,15 +5,17 @@ */ #include "pico/asm_helper.S" -#if HAS_DOUBLE_COPROCESSOR + +#if !HAS_DOUBLE_COPROCESSOR +#error attempt to compile float_aeabi_dcp when there is no DCP +#else + #include "hardware/dcp_instr.inc.S" #include "hardware/dcp_canned.inc.S" pico_default_asm_setup -// todo alignment -//__pre_init __aeabi_float_init, 00020 -// factor out save/restore (there is a copy in double code) +// todo factor out save/restore (there is a copy in double code) .macro float_section name #if PICO_FLOAT_IN_RAM @@ -29,7 +31,7 @@ float_section WRAPPER_FUNC_NAME(\func) // ============== STATE SAVE AND RESTORE =============== -.macro saving_func func +.macro saving_func type func, opt_label1='-', opt_label2='-' // Note we are usually 32-bit aligned already at this point, as most of the // function bodies contain exactly two 16-bit instructions: bmi and bx lr. // We want the PCMP word-aligned. @@ -41,8 +43,14 @@ float_section WRAPPER_FUNC_NAME(\func) push {lr} // 16-bit instruction bl generic_save_state // 32-bit instruction b 1f // 16-bit instruction +.ifnc \opt_label1,'-' +regular_func \opt_label1 +.endif +.ifnc \opt_label2,'-' +regular_func \opt_label2 +.endif // This is the actual entry point: -wrapper_func \func +\type\()_func \func PCMP apsr_nzcv bmi 1b 1: @@ -82,115 +90,208 @@ generic_restore_state: // ============== ARITHMETIC FUNCTIONS =============== float_wrapper_section __aeabi_fadd -saving_func __aeabi_fadd +saving_func wrapper __aeabi_fadd dcp_fadd_m r0,r0,r1 saving_func_return float_wrapper_section __aeabi_fsub -saving_func __aeabi_fsub +saving_func wrapper __aeabi_fsub dcp_fsub_m r0,r0,r1 saving_func_return float_wrapper_section __aeabi_frsub -saving_func __aeabi_frsub +saving_func wrapper __aeabi_frsub dcp_fsub_m r0,r1,r0 saving_func_return float_wrapper_section __aeabi_fmul -saving_func __aeabi_fmul +saving_func wrapper __aeabi_fmul dcp_fmul_m r0,r0,r1,r0,r1 saving_func_return float_section fdiv_fast -saving_func fdiv_fast +saving_func regular fdiv_fast dcp_fdiv_fast_m r0,r0,r1,r0,r1,r2 saving_func_return float_wrapper_section __aeabi_fdiv -saving_func __aeabi_fdiv +saving_func wrapper __aeabi_fdiv @ with correct rounding dcp_fdiv_m r0,r0,r1,r0,r1,r2,r3 saving_func_return float_section sqrtf_fast -saving_func sqrtf_fast +saving_func regular sqrtf_fast dcp_fsqrt_fast_m r0,r0,r0,r1,r2,r3 saving_func_return float_wrapper_section sqrtf -saving_func sqrtf +saving_func wrapper sqrtf @ with correct rounding dcp_fsqrt_m r0,r0,r0,r1,r2,r3 saving_func_return -// todo not a real thing -float_wrapper_section __aeabi_fclassify -saving_func __aeabi_fclassify +float_section fclassify +saving_func regular fclassify dcp_fclassify_m apsr_nzcv,r0 saving_func_return // ============== CONVERSION FUNCTIONS =============== float_wrapper_section __aeabi_f2d -saving_func __aeabi_f2d +saving_func wrapper __aeabi_f2d float2double dcp_float2double_m r0,r1,r0 saving_func_return float_wrapper_section __aeabi_i2f -saving_func __aeabi_i2f +saving_func wrapper __aeabi_i2f int2float @ with rounding dcp_int2float_m r0,r0 saving_func_return float_wrapper_section __aeabi_ui2f -saving_func __aeabi_ui2f +saving_func wrapper __aeabi_ui2f uint2float @ with rounding dcp_uint2float_m r0,r0 saving_func_return +float_section float2fix_z +regular_func float2fix_z + ubfx r2, r0, #23, #8 + cbz r2, 2f // input is zero or denormal + cmp r2, #0xff + beq 3f // input infinite or nan + adds r2, r1 + ble 2f // modified input is denormal so zero + cmp r2, #0xff + beq 3f // modified input is infinite +1: + bfi r0, r2, #23, #8 + b float2int_z_entry +2: + movs r0, #0 + bx lr +3: + mvn r1, #0x80000000 + add r0, r1, r0, lsr#31 @ so -Inf → 0x80000000, +Inf → 0x7fffffff + bx lr + float_wrapper_section __aeabi_f2iz -saving_func __aeabi_f2iz +saving_func wrapper __aeabi_f2iz float2int_z @ with truncation towards 0 +float2int_z_entry: dcp_float2int_m r0,r0 saving_func_return +float_section __aeabi_f2ufix +regular_func float2ufix +regular_func float2ufix_z + ubfx r2, r0, #23, #8 + cbz r2, 2f // input is zero or denormal + cmp r2, #0xff + beq 3f // input infinite or nan + adds r2, r1 + ble 2f // modified input is denormal so zero + cmp r2, #0xff + beq 3f // modified input is infinite +1: + bfi r0, r2, #23, #8 + b float2uint_z_entry +2: + movs r0, #0 + bx lr +3: + mvn r0, r0, asr #31 + bx lr + float_wrapper_section __aeabi_f2uiz -saving_func __aeabi_f2uiz +saving_func wrapper __aeabi_f2uiz float2uint_z float2uint @ with truncation towards 0 +float2uint_z_entry: dcp_float2uint_m r0,r0 saving_func_return -// todo not a real thing +float_section conv_f2fix +saving_func regular float2fix + ubfx r2, r0, #23, #8 + cbz r2, 2f // input is zero or denormal + cmp r2, #0xff + beq 3f // input infinite or nan + adds r2, r1 + ble 2f // modified input is denormal so zero + cmp r2, #0xff + beq 3f // modified input is infinite +1: + bfi r0, r2, #23, #8 + b float2int_entry +2: + movs r0, #0 + bx lr +3: + mvn r1, #0x80000000 + add r0, r1, r0, lsr#31 @ so -Inf → 0x80000000, +Inf → 0x7fffffff + bx lr + +float_section float2int +// (not a real thing - kept because we use wrapper in saving_func) +saving_func regular float2int +float2int_entry: + lsls r1, r0, #1 + // r0 = abs(zero) => r1 = 0x00000000 + // r0 = abs(denornaml) => r1 = 0x00xxxxxx + // r0 = abs(1.0f) => r1 = 0x7f000000 + // r0 = abs(inf/nan) => r1 = 0xffxxxxxx + bls float2int_z_entry // input positive or zero or -zero are ok for int64_z + lsrs r1, #24 + beq float2int_z_entry // input denormal is flushed to zero anyway + subs r1, #0x7f + bcc 1f // input < 1.0f means we need to subtract 1 after conversion + // mask off all but fractional bits + lsls r2, r0, r1 + lsls r2, #9 + beq float2int_z_entry // input is integer +1: + WXFC r0, r0 + ADD0 + ADD1 + NTDC + RDIC r0 + subs r0, #1 +saving_func_return + +#if 0 // not sure these are super useful; if they are we should give them names float_wrapper_section __aeabi_f2i_r -saving_func __aeabi_f2i_r +// (not a real thing - kept because we use wrapper in saving_func) +saving_func wrapper __aeabi_f2i_r @ with rounding dcp_float2int_r_m r0,r0 saving_func_return -// todo not a real thing float_wrapper_section __aeabi_f2ui_r -saving_func __aeabi_f2ui_r +// (not a real thing - kept because we use wrapper in saving_func) +saving_func wrapper __aeabi_f2ui_r @ with rounding dcp_float2uint_r_m r0,r0 saving_func_return +#endif // ============== COMPARISON FUNCTIONS =============== float_wrapper_section __aeabi_fcmpun -saving_func __aeabi_fcmpun +saving_func wrapper __aeabi_fcmpun dcp_fcmp_m r0,r0,r1 // extract unordered bit ubfx r0, r0, #28, #1 saving_func_return float_wrapper_section __aeabi_fcmp -saving_func __aeabi_cfrcmple +saving_func wrapper __aeabi_cfrcmple dcp_fcmp_m apsr_nzcv,r1,r0 // with arguments reversed bvs cmp_nan saving_func_return // these next two can be the same function in the absence of exceptions -saving_func __aeabi_cfcmple +saving_func wrapper __aeabi_cfcmple dcp_fcmp_m apsr_nzcv,r0,r1 bvs cmp_nan saving_func_return @@ -198,7 +299,7 @@ saving_func __aeabi_cfcmple // It is not clear from the ABI documentation whether cfcmpeq must set the C flag // in the same way as cfcmple. If not, we could save the "bvs" below; but we // err on the side of caution. -saving_func __aeabi_cfcmpeq +saving_func wrapper __aeabi_cfcmpeq dcp_fcmp_m apsr_nzcv,r0,r1 bvs cmp_nan saving_func_return @@ -212,14 +313,14 @@ cmp_nan: saving_func_return float_wrapper_section __aeabi_fcmpeq -saving_func __aeabi_fcmpeq +saving_func wrapper __aeabi_fcmpeq dcp_fcmp_m r0,r0,r1 // extract Z ubfx r0, r0, #30, #1 saving_func_return float_wrapper_section __aeabi_fcmplt -saving_func __aeabi_fcmplt +saving_func wrapper __aeabi_fcmplt dcp_fcmp_m apsr_nzcv,r1,r0 ite hi movhi r0,#1 @@ -227,7 +328,7 @@ saving_func __aeabi_fcmplt saving_func_return float_wrapper_section __aeabi_fcmple -saving_func __aeabi_fcmple +saving_func wrapper __aeabi_fcmple dcp_fcmp_m apsr_nzcv,r1,r0 ite hs movhs r0,#1 @@ -235,7 +336,7 @@ saving_func __aeabi_fcmple saving_func_return float_wrapper_section __aeabi_fcmpge -saving_func __aeabi_fcmpge +saving_func wrapper __aeabi_fcmpge dcp_fcmp_m apsr_nzcv,r0,r1 ite hs movhs r0,#1 @@ -243,7 +344,7 @@ saving_func __aeabi_fcmpge saving_func_return float_wrapper_section __aeabi_fcmpgt -saving_func __aeabi_fcmpgt +saving_func wrapper __aeabi_fcmpgt dcp_fcmp_m apsr_nzcv,r0,r1 ite hi movhi r0,#1 diff --git a/src/rp2_common/pico_float/float_aeabi_rp2040.S b/src/rp2_common/pico_float/float_aeabi_rp2040.S index 8eb83fc2..c34f68f6 100644 --- a/src/rp2_common/pico_float/float_aeabi_rp2040.S +++ b/src/rp2_common/pico_float/float_aeabi_rp2040.S @@ -471,17 +471,36 @@ float_section float2int regular_func float2int shimmable_table_tail_call SF_TABLE_FLOAT2INT float2int_shim +float_section float2fix_z +regular_func float2fix_z + cmn r0, r0 + bcc float2fix + push {lr} + lsls r0, #1 + lsrs r0, #1 + bl float2ufix_z + cmp r0, #0 + bmi 1f + negs r0, r0 + pop {pc} +1: + movs r0, #128 + lsls r0, #24 + pop {pc} + float_section float2fix regular_func float2fix shimmable_table_tail_call SF_TABLE_FLOAT2FIX float2fix_shim float_section float2ufix regular_func float2ufix +regular_func float2ufix_z table_tail_call SF_TABLE_FLOAT2UFIX // unsigned FUNC_NAME(__aeabi_f2uiz)(float) float (single precision) to unsigned C-style conversion [3] float_wrapper_section __aeabi_f2uiz wrapper_func __aeabi_f2uiz +regular_func float2uint regular_func float2uint_z table_tail_call SF_TABLE_FLOAT2UINT @@ -530,10 +549,11 @@ wrapper_func __aeabi_f2lz regular_func float2int64_z cmn r0, r0 bcc float2int64 + movs r1, #0 +float2fix64_z_neg: push {lr} lsls r0, #1 lsrs r0, #1 - movs r1, #0 bl float2ufix64 cmp r1, #0 bmi 1f @@ -553,17 +573,24 @@ regular_func float2int64 shimmable_table_tail_call SF_TABLE_FLOAT2INT64 float2int64_shim float_section float2fix64 +regular_func float2fix64_z + cmn r0, r0 + bcs float2fix64_z_neg + // fall thru + regular_func float2fix64 shimmable_table_tail_call SF_TABLE_FLOAT2FIX64 float2fix64_shim // unsigned long long FUNC_NAME(__aeabi_f2ulz)(float) float to unsigned long long C-style conversion [3] float_wrapper_section __aeabi_f2ulz wrapper_func __aeabi_f2ulz +regular_func float2uint64 regular_func float2uint64_z shimmable_table_tail_call SF_TABLE_FLOAT2UINT64 float2uint64_shim float_section float2ufix64 regular_func float2ufix64 +regular_func float2ufix64_z shimmable_table_tail_call SF_TABLE_FLOAT2UFIX64 float2ufix64_shim float_wrapper_section __aeabi_f2d diff --git a/src/rp2_common/pico_float/float_conv_m33.S b/src/rp2_common/pico_float/float_common_m33.S similarity index 85% rename from src/rp2_common/pico_float/float_conv_m33.S rename to src/rp2_common/pico_float/float_common_m33.S index dd47a939..491d758f 100644 --- a/src/rp2_common/pico_float/float_conv_m33.S +++ b/src/rp2_common/pico_float/float_common_m33.S @@ -241,7 +241,52 @@ regular_func ufix642float bxlo r14 b 3b -float_wrapper_section conv_ftoi64 +float_section conv_ftoi64 +regular_func float2int64 + lsls r1, r0, #1 + // r0 = abs(zero) => r1 = 0x00000000 + // r0 = abs(denornaml) => r1 = 0x00xxxxxx + // r0 = abs(1.0f) => r1 = 0x7f000000 + // r0 = abs(inf/nan) => r1 = 0xffxxxxxx + bls float2int64_z // positive or zero or -zero are ok for int64_z + lsrs r1, #24 + subs r1, #0x7f + bcc 1f // <1 means subtract 1 + // mask off all but fractional bits + lsls r2, r0, r1 + lsls r2, #9 + beq float2int64_z // integer +1: + push {lr} + bl float2int64_z + subs r0, #1 + sbcs r1, r1, #0 + pop {pc} + +float_section conv_ftof64 +regular_func float2fix64 + lsls r2, r0, #1 + // r0 = abs(zero) => r1 = 0x00000000 + // r0 = abs(denornaml) => r1 = 0x00xxxxxx + // r0 = abs(1.0f) => r1 = 0x7f000000 + // r0 = abs(inf/nan) => r1 = 0xffxxxxxx + bls float2fix64_z // positive or zero or -zero are ok for fix64_z + lsrs r2, #24 + rsbs r3, r1, #0x7f + subs r2, r3 + bcc 1f // <1 means subtract 1 + // mask off all but fractional bits + lsls r2, r0, r2 + lsls r2, #9 + beq float2fix64_z // integer +1: + push {lr} + bl float2fix64_z + subs r0, #1 + sbcs r1, r1, #0 + pop {pc} + +float_wrapper_section conv_ftoi64z @ convert float to signed int64, rounding towards 0, clamping wrapper_func __aeabi_f2lz @@ -318,7 +363,7 @@ regular_func float2uint64_z movs r1,#0 @ fall through @ convert float in r0 to unsigned fixed point in r0:r1, clamping regular_func float2ufix64 -//regular_func float2ufix64_z +regular_func float2ufix64_z subs r1,#0x96 @ remove exponent bias, compensate for mantissa length asrs r2,r0,#23 @ sign and exponent sub r3,r2,#1 diff --git a/src/rp2_common/pico_float/float_conv32_vfp.S b/src/rp2_common/pico_float/float_conv32_vfp.S new file mode 100644 index 00000000..80fb5ca2 --- /dev/null +++ b/src/rp2_common/pico_float/float_conv32_vfp.S @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2024 Raspberry Pi (Trading) Ltd. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#if !PICO_RP2040 +#include "pico/asm_helper.S" + +pico_default_asm_setup + +.macro float_section name +#if PICO_FLOAT_IN_RAM +.section RAM_SECTION_NAME(\name), "ax" +#else +.section SECTION_NAME(\name), "ax" +#endif +.endm + +float_section int2float +regular_func int2float + vmov s15, r0 + vcvt.f32.s32 s15, s15 + vmov r0, s15 + bx lr + +float_section uint2float +regular_func uint2float + vmov s15, r0 + vcvt.f32.u32 s15, s15 + vmov r0, s15 + bx lr + +float_section float2int +regular_func float2int + vmov s15, r0 + vcvtm.s32.f32 s15, s15 + vmov r0, s15 + bx lr + +float_section float2int_z +regular_func float2int_z + vmov s15, r0 + vcvt.s32.f32 s15, s15 + vmov r0, s15 + bx lr + +float_section float2uint +regular_func float2uint +regular_func float2uint_z + vmov s15, r0 + vcvt.u32.f32 s15, s15 + vmov r0, s15 + bx lr + +float_section float2fix_z +regular_func float2fix_z + ubfx r2, r0, #23, #8 + adds r2, r1 + asrs r3, r2, #8 + beq 1f + ite pl + movpl r2, #0xff + movmi r2, #0 +1: + bfi r0, r2, #23, #8 + b float2int_z + +float_section float2fix +regular_func float2fix + lsls r2, r0, #1 + // r0 = abs(zero) => r1 = 0x00000000 + // r0 = abs(denornaml) => r1 = 0x00xxxxxx + // r0 = abs(1.0f) => r1 = 0x7f000000 + // r0 = abs(inf/nan) => r1 = 0xffxxxxxx + bls float2fix_z // input positive or zero or -zero are ok for fix_z + lsrs r2, #24 + beq float2fix_z // input denormal will be flushed to zero + rsbs r3, r1, #0x7f + subs r2, r3 + bcc 1f // iunput <1.0f means we need to subtract 1 + // mask off all but fractional bits + lsls r2, r0, r2 + lsls r2, #9 + beq float2fix_z // input is integer +1: + push {lr} + bl float2fix_z + subs r0, #1 + sbcs r1, r1, #0 + pop {pc} + +float_section float2ufix +regular_func float2ufix +regular_func float2ufix_z + ubfx r2, r0, #23, #8 + adds r2, r1 + asrs r3, r2, #8 + beq 1f + ite pl + movpl r2, #0xff + movmi r2, #0 +1: + bfi r0, r2, #23, #8 + b float2uint_z +#endif diff --git a/src/rp2_common/pico_float/include/pico/float.h b/src/rp2_common/pico_float/include/pico/float.h index 6cafc83e..ef95e1ec 100644 --- a/src/rp2_common/pico_float/include/pico/float.h +++ b/src/rp2_common/pico_float/include/pico/float.h @@ -21,68 +21,296 @@ extern "C" { * * \brief Optimized single-precision floating point functions * -* (Replacement) optimized implementations are provided for the following compiler built-ins -* and math library functions on Arm: +* An application can take control of the floating point routines used in the application over and above what is provided by the compiler, +* by depending on the pico_float library. A user might want to do this * -* - __aeabi_fadd, __aeabi_fdiv, __aeabi_fmul, __aeabi_frsub, __aeabi_fsub, __aeabi_cfcmpeq, __aeabi_cfrcmple, __aeabi_cfcmple, __aeabi_fcmpeq, __aeabi_fcmplt, __aeabi_fcmple, __aeabi_fcmpge, __aeabi_fcmpgt, __aeabi_fcmpun, __aeabi_i2f, __aeabi_l2f, __aeabi_ui2f, __aeabi_ul2f, __aeabi_f2iz, __aeabi_f2lz, __aeabi_f2uiz, __aeabi_f2ulz, __aeabi_f2d, sqrtf, cosf, sinf, tanf, atan2f, expf, logf -* - ldexpf, copysignf, truncf, floorf, ceilf, roundf, asinf, acosf, atanf, sinhf, coshf, tanhf, asinhf, acoshf, atanhf, exp2f, log2f, exp10f, log10f, powf, hypotf, cbrtf, fmodf, dremf, remainderf, remquof, expm1f, log1pf, fmaf -* - powintf, sincosf (GNU extensions) +* 1. To use optimized software implementations provided by the RP2-series device's bootrom or the SDK +* 2. To use optimized combined software/hardware implementations utilizing custom RP2-series hardware for acceleration +* 3. To control the amount of C compiler/library code bloat +* 4. To make sure no floating point is called at all * -* The following additional optimized functions are also provided: +* The pico_float library comes in three main flavors: * -* - int2float, uint2float, int642float, uint642float, fix2float, ufix2float, fix642float, ufix642float -* - float2fix, float2ufix, float2fix64, float2ufix64, float2int, float2uint, float2int64, float2uint64, float2int_z, float2int64_z, float2uint_z, float2uint64_z -* - exp10f, sincosf, powintf +* 1. `pico_float_none` - all floating point operations cause a \ref panic - no single-precision floating point code is included +* 2. `pico_float_compiler` - no custom functions are provided; all single-precision floating point is handled by the C compiler/library +* 3. `pico_float_pico` - the smallest and fastest available for the platform, along with additional functionality (e.g. fixed point conversions) which are detailed below * -* On RP2350 (Arm) the following additional functions are available; the _fast methods are faster but do not round correctly +* The user can control which version they want (e.g. **pico_float_xxx** by either setting the CMake global variable +* `PICO_DEFAULT_FLOAT_IMPL=xxx`, or by using the CMake function `pico_set_float_implementation( xxx)`. Note that in the absence +* of either, pico_float_pico is used by default. * -* - float2fix64_z, fdiv_fast, fsqrt_fast, +* \if rp2040_specific +* On RP2040, `pico_float_pico` uses optimized hand coded implementations from the bootrom and the SDK for both +* basic single-precision floating point operations and floating point math library functions. These implementations +* are generally faster and smaller than those provided by the C compiler/library, though they don't support all the features of a fully compliant +* floating point implementation; they are however usually fine for the majority of cases +* \endif * -* On RP2350 RISC-V, only a small number of compiler runtime functions are overridden with faster implementations: +* \if rp2350_specific +* On Arm on RP2350, there are multiple options for `pico_float_pico`: * -* - __addsf3, __subsf3, __mulsf3 +* 1. `pico_float_pico_vfp` - this library leaves basic C single-precision floating point operations to the compiler +* which can use inlined VFP (Arm FPU) code. Custom optimized versions of trigonometric and scientific functions are provided. +* No DCP (RP2350 Double co-processor) instructions are used. +* 2. `pico_float_pico_dcp` - this library prevents the compiler injecting inlined VFP code, and also implements +* all single-precision floating point operations in optimized DCP or M33 code. This option is not quite as fast +* as pico_float_pico_vfp, however it allows floating point operations without enabling the floating point co-processor +* on the CPU; this can be beneficial in certain circumstances, e.g. where leaving stack in tasks or interrupts +* for the floating point state is undesirable. +* +* Note: `pico_float_pico` is equivalent to `pico_float_pico_vfp` on RP2350, as this is the most sensible default +* \endif +* +* On Arm, (replacement) optimized implementations are provided for the following compiler built-ins +* and math library functions when using `_pico` variants of `pico_float`: +* +* - basic arithmetic: (except `pico_float_pico_vfp`) +* +* __aeabi_fadd, __aeabi_fdiv, __aeabi_fmul, __aeabi_frsub, __aeabi_fsub +* +* - comparison: (except `pico_float_pico_vfp`) +* +* __aeabi_cfcmpeq, __aeabi_cfrcmple, __aeabi_cfcmple, __aeabi_fcmpeq, __aeabi_fcmplt, __aeabi_fcmple, __aeabi_fcmpge, __aeabi_fcmpgt, __aeabi_fcmpun +* +* - (u)int32 <-> float: (except `pico_float_pico_vfp`) +* +* __aeabi_i2f, __aeabi_ui2f, __aeabi_f2iz, __aeabi_f2uiz +* +* - (u)int64 <-> float: (except `pico_float_pico_vfp`) +* +* __aeabi_l2f, __aeabi_ul2f, __aeabi_f2lz, __aeabi_f2ulz +* +* - float -> double: (except `pico_float_pico_vfp`) +* +* __aeabi_f2d +* +* - basic trigonometric: +* +* sqrtf, cosf, sinf, tanf, atan2f, expf, logf +* +* - trigonometric and scientific +* +* ldexpf, copysignf, truncf, floorf, ceilf, roundf, asinf, acosf, atanf, sinhf, coshf, tanhf, asinhf, acoshf, atanhf, exp2f, log2f, exp10f, log10f, powf, hypotf, cbrtf, fmodf, dremf, remainderf, remquof, expm1f, log1pf, fmaf +* +* - GNU exetnsions: +* +* powintf, sincosf +* +* On Arm, the following additional optimized functions are also provided (when using `_pico` variants of `pico_float`): +* +* - Conversions to/from integer types: +* +* - (u)int -> float (round to nearest): +* +* int2float, uint2float, int642float, uint642float +* +* note: on `pico_float_pico_vfp` the 32-bit functions are also provided as C macros since they map to inline VFP code +* +* - (u)float -> int (round towards zero): +* +* float2int_z, float2uint_z, float2int64_z, float2uint64_z +* +* note: on `pico_float_pico_vfp` the 32-bit functions are also provided as C macros since they map to inline VFP code +* +* - (u)float -> int (round towards -infinity): +* +* float2int, float2uint, float2int64, float2uint64 +* +* - Conversions to/from fixed point integers: +* +* - (u)fix -> float (round to nearest): +* +* fix2float, ufix2float, fix642float, ufix642float +* +* - float -> (u)fix (round towards zero): +* +* float2fix_z, float2ufix_z, float2fix64_z, float2ufix64_z +* +* note: on `pico_float_pico_vfp` the 32-bit functions are also provided as C macros since they can map to inline VFP code +* when the number of fractional bits is a compile time constant between 1 and 32 +* +* - float -> (u)fix (round towards -infinity): +* +* float2fix, float2ufix, float2fix64, float2ufix64 +* +* note: on `pico_float_pico_vfp` the 32-bit functions are also provided as C macros since they can map to inline VFP code +* when the number of fractional bits is a compile time constant between 1 and 32 +* +* - Even faster versions of divide and square-root functions that do not round correctly: (`pico_float_pico_dcp` only) +* +* fdiv_fast, sqrtf_fast +* +* \if rp2350_specific +* On RISC-V, (replacement) optimized implementations are provided for the following compiler built-ins when using the `pico_float_pico` +* library (note that there are no variants of this library like there are on Arm): +* +* - basic arithmetic: +* +* __addsf3, __subsf3, __mulsf3 +* \endif */ - -// None of these functions are available on RISC-V: #if !defined(__riscv) || PICO_COMBINED_DOCS -float int2float(int32_t f); -float uint2float(uint32_t f); -float int642float(int64_t f); -float uint642float(uint64_t f); +#if PICO_COMBINED_DOCS || !LIB_PICO_FLOAT_COMPILER +float int2float(int32_t i); +float uint2float(uint32_t i); +float int642float(int64_t i); +float uint642float(uint64_t i); float fix2float(int32_t m, int e); float ufix2float(uint32_t m, int e); float fix642float(int64_t m, int e); float ufix642float(uint64_t m, int e); -// These methods round towards -Infinity. -int32_t float2fix(float f, int e); -uint32_t float2ufix(float f, int e); -int64_t float2fix64(float f, int e); -uint64_t float2ufix64(float f, int e); -int32_t float2int(float f); -uint32_t float2uint(float f); -int64_t float2int64(float f); -uint64_t float2uint64(float f); - -// These methods round towards 0. +// These methods round towards 0, which IS the C way int32_t float2int_z(float f); int64_t float2int64_z(float f); int32_t float2uint_z(float f); int64_t float2uint64_z(float f); +int32_t float2fix_z(float f, int e); +uint32_t float2ufix_z(float f, int e); +int64_t float2fix64_z(float f, int e); +uint64_t float2ufix64_z(float f, int e); + +// These methods round towards -Infinity - which IS NOT the C way for negative numbers; +// as such the naming is not ideal, however is kept for backwards compatibility +int32_t float2int(float f); +uint32_t float2uint(float f); +int64_t float2int64(float f); +uint64_t float2uint64(float f); +int32_t float2fix(float f, int e); +uint32_t float2ufix(float f, int e); +int64_t float2fix64(float f, int e); +uint64_t float2ufix64(float f, int e); + +#if LIB_PICO_FLOAT_PICO_VFP +// a bit of a hack to inline VFP fixed point conversion when exponent is constant and in range 1-32 +#define fix2float(m, e) __builtin_choose_expr(__builtin_constant_p(e), (e) >= 1 && (e) <= 32 ? _fix2float_inline(m, e) : fix2 ## float(m, e), fix2 ## float(m, e)) +#define ufix2float(m, e) __builtin_choose_expr(__builtin_constant_p(e), (e) >= 1 && (e) <= 32 ? _ufix2float_inline(m, e) : ufix2 ## float(m, e), ufix2 ## float(m, e)) +#define float2fix_z(f, e) __builtin_choose_expr(__builtin_constant_p(e), (e) >= 1 && (e) <= 32 ? _float2fix_z_inline(f, e) : float2 ## fix_z(f, e), float2 ## fix_z(f, e)) +#define float2ufix_z(f, e) __builtin_choose_expr(__builtin_constant_p(e), (e) >= 1 && (e) <= 32 ? _float2ufix_z_inline(f, e) : float2 ## ufix_z(f, e), float2 ## ufix_z(f, e)) +#define float2fix(f, e) __builtin_choose_expr(__builtin_constant_p(e), (e) >= 1 && (e) <= 32 ? _float2fix_inline(f, e) : float2 ## fix(f, e), float2 ## fix(f, e)) +#define float2ufix(f, e) __builtin_choose_expr(__builtin_constant_p(e), (e) >= 1 && (e) <= 32 ? _float2ufix_inline(f, e) : float2 ## ufix(f, e), float2 ## ufix(f, e)) + +#define _fix2float_inline(m, e) ({ \ + int32_t _m = m; \ + float f; \ + pico_default_asm( \ + "vmov %0, %1\n" \ + "vcvt.f32.s32 %0, %0, %2\n" \ + : "=t" (f) \ + : "r" (_m), "i" (e) \ + ); \ + f; \ +}) +#define _ufix2float_inline(m, e) ({ \ + uint32_t _m = m; \ + float f; \ + pico_default_asm( \ + "vmov %0, %1\n" \ + "vcvt.f32.u32 %0, %0, %2\n" \ + : "=t" (f) \ + : "r" (_m), "i" (e) \ + ); \ + f; \ +}) +#define _float2fix_z_inline(f, e) ({ \ + int32_t _m; \ + float _f = (f); \ + pico_default_asm( \ + "vcvt.s32.f32 %0, %0, %2\n" \ + "vmov %1, %0\n" \ + : "+t" (_f), "=r" (_m) \ + : "i" (e) \ + ); \ + _m; \ +}) +#define _float2ufix_z_inline(f, e) ({ \ + uint32_t _m; \ + float _f = (f); \ + pico_default_asm( \ + "vcvt.u32.f32 %0, %0, %2\n" \ + "vmov %1, %0\n" \ + : "+t" (_f), "=r" (_m) \ + : "i" (e) \ + ); \ + _m; \ +}) +#define _float2fix_z_inline(f, e) ({ \ + int32_t _m; \ + float _f = (f); \ + pico_default_asm( \ + "vcvt.s32.f32 %0, %0, %2\n" \ + "vmov %1, %0\n" \ + : "+t" (_f), "=r" (_m) \ + : "i" (e) \ + ); \ + _m; \ +}) +#define _float2fix_inline(f, e) ({ \ + union { float _f; int32_t _i; } _u; \ + _u._f = (f); \ + uint rc, tmp; \ + pico_default_asm( \ + "vcvt.s32.f32 %0, %0, %4\n" \ + "vmov %2, %0\n" \ + "lsls %1, #1\n" \ + "bls 2f\n" /* positive or zero or -zero are ok with the result we have */ \ + "lsrs %3, %1, #24\n" \ + "subs %3, #0x7f - %c4\n" \ + "bcc 1f\n" /* 0 < abs(f) < 1 ^ e, so need to round down */ \ + /* mask off all but fractional bits */ \ + "lsls %1, %3\n" \ + "lsls %1, #8\n" \ + "beq 2f\n" /* integers can round towards zero */ \ + "1:\n" \ + /* need to subtract 1 from the result to round towards -infinity... */ \ + /* this will never cause an overflow, because to get here we must have had a non integer/infinite value which */ \ + /* therefore cannot have been equal to INT64_MIN when rounded towards zero */ \ + "subs %2, #1\n" \ + "2:\n" \ + : "+t" (_u._f), "+r" (_u._i), "=r" (rc), "=r" (tmp) \ + : "i" (e) \ + ); \ + rc; \ +}) +#define _float2ufix_inline(f, e) _float2ufix_z_inline((f), (e)) +#endif + +#if LIB_PICO_FLOAT_PICO_VFP +// may as well provide inline macros for VFP +#define int2float(i) ((float)(int32_t)(i)) +#define uint2float(i) ((float)(uint32_t)(i)) +#define float2int_z(f) ((int32_t)(f)) +#define float2uint_z(f) ((uint32_t)(f)) +#endif + +#endif float exp10f(float x); void sincosf(float x, float *sinx, float *cosx); float powintf(float x, int y); #if !PICO_RP2040 || PICO_COMBINED_DOCS -int64_t float2fix64_z(float f, int e); float fdiv_fast(float n, float d); -float fsqrt_fast(float f); +float sqrtf_fast(float f); #endif #endif +#if defined(__riscv) || LIB_PICO_FLOAT_COMPILER +// when using the compiler or RISC-V, we provide as many functions as we trivially can - these will be efficient +// when using hard-float on Arm +static inline float int2float(int32_t i) { return (float)i; } +static inline float uint2float(uint32_t i) { return (float)i; } +static inline float int642float(int64_t i) { return (float)i; } +static inline float uint642float(uint64_t i) { return (float)i; } + +static inline int32_t float2int_z(float f) { return (int32_t)f; } +static inline int64_t float2int64_z(float f) { return (int64_t)f; } +static inline int32_t float2uint_z(float f) { return (uint32_t)f; } +static inline int64_t float2uint64_z(float f) { return (uint64_t)f; } +#endif + #ifdef __cplusplus } #endif diff --git a/test/pico_float_test/BUILD.bazel b/test/pico_float_test/BUILD.bazel index 1efdf724..36405330 100644 --- a/test/pico_float_test/BUILD.bazel +++ b/test/pico_float_test/BUILD.bazel @@ -85,3 +85,12 @@ filegroup( name = "m33", srcs = ["m33.c"], ) + +# TODO: Add these tests to the Bazel build. +filegroup( + name = "unsupported_tests", + srcs = [ + "custom_double_funcs_test.c", + "custom_float_funcs_test.c", + ], +) \ No newline at end of file diff --git a/test/pico_float_test/CMakeLists.txt b/test/pico_float_test/CMakeLists.txt index 971c2a6d..93845bb3 100644 --- a/test/pico_float_test/CMakeLists.txt +++ b/test/pico_float_test/CMakeLists.txt @@ -79,4 +79,31 @@ else () target_link_libraries(m33 pico_double pico_stdlib) pico_add_extra_outputs(m33) endif() + endif() + +set(FLOAT_TYPES compiler) +set(DOUBLE_TYPES compiler) +list(APPEND FLOAT_TYPES pico) +list(APPEND DOUBLE_TYPES pico) +if (PICO_RP2350) + if (NOT PICO_RISCV) + list(APPEND FLOAT_TYPES pico_vfp pico_dcp) + endif() +endif() + +foreach (FLOAT_TYPE IN LISTS FLOAT_TYPES) + add_executable(custom_float_funcs_test_${FLOAT_TYPE} custom_float_funcs_test.c) + pico_set_float_implementation(custom_float_funcs_test_${FLOAT_TYPE} ${FLOAT_TYPE}) + target_link_libraries(custom_float_funcs_test_${FLOAT_TYPE} PRIVATE pico_stdlib) + pico_add_extra_outputs(custom_float_funcs_test_${FLOAT_TYPE}) + pico_set_printf_implementation(custom_float_funcs_test_${FLOAT_TYPE} compiler) +endforeach () + +foreach (DOUBLE_TYPE IN LISTS DOUBLE_TYPES) + add_executable(custom_double_funcs_test_${DOUBLE_TYPE} custom_double_funcs_test.c) + pico_set_double_implementation(custom_double_funcs_test_${DOUBLE_TYPE} ${DOUBLE_TYPE}) + target_link_libraries(custom_double_funcs_test_${DOUBLE_TYPE} PRIVATE pico_stdlib) + pico_add_extra_outputs(custom_double_funcs_test_${DOUBLE_TYPE}) + pico_set_printf_implementation(custom_double_funcs_test_${DOUBLE_TYPE} compiler) +endforeach () \ No newline at end of file diff --git a/test/pico_float_test/custom_double_funcs_test.c b/test/pico_float_test/custom_double_funcs_test.c new file mode 100644 index 00000000..85624d4c --- /dev/null +++ b/test/pico_float_test/custom_double_funcs_test.c @@ -0,0 +1,515 @@ +#include +#include "pico/stdlib.h" +#include "pico/double.h" +#include "math.h" + +#if 0 +#define printf(...) ((void)0) +#endif +#if 0 +#define stop() return -1 +#else +#define stop() rc=1 +#endif +#define test_assert(x) ({ if (!(x)) { printf("Assertion failed: ");puts(#x);printf(" at " __FILE__ ":%d\n", __LINE__); stop(); } }) +#define test_checkd(x, expected, msg) ({ if ((x) != (expected)) { printf(" %s: %f != %f\n", msg, x, expected); stop(); } }) +#define test_checki(x, expected, msg) ({ if ((x) != (expected)) { printf(" %s: %d != %d\n", msg, x, expected); stop(); } }) +#define test_checku(x, expected, msg) ({ if ((uint32_t)(x) != (uint32_t)(expected)) { printf(" %s: %u != %u\n", msg, x, expected); stop(); } }) +#define test_checki64(x, expected, msg) ({ if ((x) != (expected)) { printf(" %s: %lld != %lld\n", msg, (int64_t)(x), (int64_t)(expected)); stop(); } }) +#define test_checku64(x, expected, msg) ({ if ((uint64_t)(x) != (uint64_t)(expected)) { printf(" %s: %llu != %llu\n", msg, (uint64_t)(x), (uint64_t)(expected)); stop(); } }) + +#if !(LIB_PICO_DOUBLE_COMPILER || defined(__riscv)) +static inline double fix2double_8(int32_t m) { return fix2double(m, 8); } +static inline double fix2double_12(int32_t m) { return fix2double(m, 12); } +static inline double fix2double_16(int32_t m) { return fix2double(m, 16); } +static inline double fix2double_24(int32_t m) { return fix2double(m, 24); } +static inline double fix2double_28(int32_t m) { return fix2double(m, 28); } +static inline double fix2double_32(int32_t m) { return fix2double(m, 32); } + +static inline double ufix2double_12(int32_t m) { return ufix2double(m, 12); } + +static inline double double2fix_12(int32_t m) { return double2fix(m, 12); } + +static inline double double2ufix_12(int32_t m) { return double2ufix(m, 12); } +#endif + +#if 1 && (LIB_PICO_DOUBLE_COMPILER || defined(__riscv)) +#define double2int_z(f) ({ double _d = f; pico_default_asm_volatile("" : "+r" (_d)); double2 ## int_z(_d); }) +#define double2uint_z(f) ({ double _d = f; pico_default_asm_volatile("" : "+r" (_d)); double2 ## uint_z(_d); }) +#define double2int64_z(f) ({ double _d = f; pico_default_asm_volatile("" : "+r" (_d)); double2 ## int64_z(_d); }) +#define double2uint64_z(f) ({ double _d = f; pico_default_asm_volatile("" : "+r" (_d)); double2 ## uint64_z(_d); }) +#define int2double(i) ({ int32_t _i = i; pico_default_asm_volatile("" : "+r" (_i)); int2 ## double(_i); }) +#define uint2double(i) ({ uint32_t _i = i; pico_default_asm_volatile("" : "+r" (_i)); uint2 ## double(_i); }) +#define int642double(i) ({ int64_t _i = i; pico_default_asm_volatile("" : "+r" (_i)); int642 ## double(_i); }) +#define uint642double(i) ({ uint64_t _i = i; pico_default_asm_volatile("" : "+r" (_i)); uint642 ## double(_i); }) +#endif + +int test() { + int rc = 0; +#if LIB_PICO_DOUBLE_PICO + printf(">>> Using PICO\n"); +#endif + printf("int2double\n"); + test_checkd(int2double(0), 0.0, "int2double1"); + test_checkd(int2double(-1), -1.0, "int2double2"); + test_checkd(int2double(1), 1.0, "int2double3"); + test_checkd(int2double(INT32_MAX), 2147483647.0, "int2double4"); + test_checkd(int2double(INT32_MIN), -2147483648.0, "int2double5"); + // these have rounding behavior on float but not double + test_checkd(int2double(2147483391), 2147483391.0, "int2double6"); + test_checkd(int2double(2147483391), 2147483391.0, "int2double7"); + test_checkd(int2double(2147483457), 2147483457.0, "int2double8"); + test_checkd(int2double(2147483483), 2147483483.0, "int2double9"); + test_checkd(int2double(2147483584), 2147483584.0, "int2double10"); + + printf("uint2double\n"); + test_checkd(uint2double(0), 0.0, "uint2double1"); + test_checkd(uint2double(1), 1.0, "uint2double2"); + test_checkd(uint2double(INT32_MAX), 2147483647.0, "uint2double3"); + // todo test correct rounding around maximum precision + test_checkd(uint2double(UINT32_MAX), 4294967295.0, "uint2double4"); + + printf("int642double\n"); + test_checkd(int642double(0), 0.0, "int642double1"); + test_checkd(int642double(-1), -1.0, "int642double2"); + test_checkd(int642double(1), 1.0, "int642double3"); + test_checkd(int642double(INT32_MAX-1), 2147483646.0, "int642double4"); + test_checkd(int642double(INT32_MAX), 2147483647.0, "int642double5"); + test_checkd(int642double(INT32_MAX+1ll), 2147483648.0, "int642double6"); + test_checkd(int642double(INT32_MIN-1ll), -2147483649.0, "int642double7"); + test_checkd(int642double(INT32_MIN), -2147483648.0, "int642double8"); + test_checkd(int642double(INT32_MIN+1ll), -2147483647.0, "int642double9"); + // todo test correct rounding around maximum precision + test_checkd(int642double(INT64_MAX), 9223372036854775807.0, "int642double10"); + test_checkd(int642double(INT64_MIN), -9223372036854775808.0, "int642doubl11e"); + + printf("uint642double\n"); + test_checkd(uint642double(0), 0.0, "uint642double1"); + test_checkd(uint642double(1), 1.0, "uint642double2"); + test_checkd(uint642double(INT32_MAX-1), 2147483646.0, "uint642double3"); + test_checkd(uint642double(INT32_MAX), 2147483647.0, "uint642double4"); + test_checkd(uint642double(INT32_MAX+1ll), 2147483648.0, "uint642double5"); + test_checkd(uint642double(INT64_MAX), 9223372036854775807.0, "uint642double6"); + // todo test correct rounding around maximum precision + test_checkd(uint642double(UINT64_MAX), 18446744073709551615.0, "uint642double7"); + + union { + uint64_t u; + double d; + } u64d; + +#if !(LIB_PICO_DOUBLE_COMPILER || defined(__riscv)) + printf("fix2double\n"); + // todo test correct rounding around maximum precision + test_checkd(fix2double(-3, 1), -1.5, "fix2double1"); + test_checkd(fix2double(-3, 1), -1.5, "fix2double2"); + test_checkd(fix2double(-3, -4), -48.0, "fix2double3"); + + printf("ufix2double\n"); + // todo test correct rounding around maximum precision + test_checkd(ufix2double(0xa0000000, 30), 2.5, "ufix2double1"); + test_checkd(ufix2double(3, -4), 48.0, "ufix2double2"); + + printf("fix64double\n"); + // todo test correct rounding around maximum precision + test_checkd(fix642double(-0xa000000000ll, 38), -2.5, "fix642double1"); + test_checkd(fix642double(-3, -34), -51539607552.0, "fix642double2"); + + printf("ufix642double\n"); + // todo test correct rounding around maximum precision + test_checkd(ufix642double(0xa000000000ll, 38), 2.5, "ufix642double1"); + test_checkd(ufix642double(3, -34), 51539607552.0, "fix64double2"); + + test_checkd(fix2double_8(128), 0.5, "fix2double_8_1"); + test_checkd(fix2double_8(-128), -0.5, "fix2double_8_2"); + test_checkd(fix2double_16(8192), 0.125, "fix2double_8_3"); + test_checkd(fix2double_16(-8192), -0.125, "fix2double_8_4"); + test_checkd(fix2double_24(3<<23), 1.5, "fix2double_8_5"); + test_checkd(fix2double_24(-(3<<23)), -1.5, "fix2double_8_6"); + + printf("double2fix\n"); + test_checki(double2fix(-0.5, 8), -0x80, "double2fix0"); + test_checki(double2fix(3.5, 8), 0x380, "double2fix1"); + test_checki(double2fix(-3.5, 8), -0x380, "double2fix2"); + test_checki(double2fix(32768.0, 16), INT32_MAX, "double2fix3"); + test_checki(double2fix(65536.0, 16), INT32_MAX, "double2fix4"); + test_checki(double2fix(-65536.0, 16), INT32_MIN, "double2fix4b"); + test_checki(double2fix(INFINITY, 16), INT32_MAX, "double2fix5"); + test_checki(double2fix(-INFINITY, 16), INT32_MIN, "double2fix5b"); + test_checki(double2fix(INFINITY, -16), INT32_MAX, "double2fix5c"); + test_checki(double2fix(-INFINITY, -16), INT32_MIN, "double2fix5d"); + test_checki(double2fix(3.24999, 2), 12, "double2fix6"); + test_checki(double2fix(3.25, 2), 13, "double2fix7"); + test_checki(double2fix(-3.24999, 2), -13, "double2fix8"); + test_checki(double2fix(-3.25, 2), -13, "double2fix9"); + test_checki(double2fix(-0.75, 1), -2, "double2fix10"); + test_checki(double2fix(-3.0, -1), -2, "double2fix11"); // not very useful + test_checki(double2fix(0.0, 16), 0, "double2fix12"); + test_checki(double2fix(-0.0, 16), 0, "double2fix13"); + test_checki(double2fix(0.0, -16), 0, "double2fix14"); + test_checki(double2fix(-0.0, -16), 0, "double2fix15"); + + printf("double2ufix\n"); + test_checku(double2ufix(3.5, 8), 0x380, "double2ufix1"); + test_checku(double2ufix(-3.5, 8), 0, "double2ufix2"); + test_checku(double2ufix(32768.0, 16), 32768 << 16, "double2ufix3"); + test_checku(double2ufix(65536.0, 16), UINT32_MAX, "double2ufix4"); + test_checku(double2ufix(INFINITY, 16), UINT32_MAX, "double2ufix5"); + test_checku(double2ufix(-INFINITY, 16), 0, "double2ufix5b"); + test_checku(double2ufix(INFINITY, -16), UINT32_MAX, "double2ufix5c"); + test_checku(double2ufix(-INFINITY, -16), 0, "double2ufix5d"); + test_checku(double2ufix(3.24999, 2), 12, "double2ufix6"); + test_checku(double2ufix(3.25, 2), 13, "double2ufix7"); + test_checku(double2ufix(3.0, -1), 1, "double2ufix8"); // not very useful + test_checki(double2ufix(0.0, 16), 0, "double2ufix12"); + test_checki(double2ufix(-0.0, 16), 0, "double2fix13"); + test_checki(double2ufix(0.0, -16), 0, "double2ufix14"); + test_checki(double2ufix(-0.0, -16), 0, "double2fix15"); + + printf("double2fix64\n"); + test_checki64(double2fix64(3.5, 8), 0x380, "double2fix641"); + test_checki64(double2fix64(-3.5, 8), -0x380, "double2fix642"); + test_checki64(double2fix64(32768.0, 16), 32768ll << 16, "double2fix643"); + test_checki64(double2fix64(65536.0, 16), 65536ll << 16, "double2fix644"); + test_checki64(double2fix64(2147483648.0, 16), 2147483648ll << 16, "double2ufix644b"); + test_checki64(double2fix64(65536.0 * 65536.0 * 32768.0, 16), INT64_MAX, "double2fix644c"); + test_checki64(double2fix64(INFINITY, 16), INT64_MAX, "double2fix645"); + test_checki64(double2fix64(-INFINITY, 16), INT64_MIN, "double2fix645b"); + test_checki64(double2fix64(INFINITY, -16), INT64_MAX, "double2fix645c"); + test_checki64(double2fix64(-INFINITY, -16), INT64_MIN, "double2fix645d"); + test_checki64(double2fix64(3.24999, 2), 12, "double2fix646"); + test_checki64(double2fix64(3.25, 2), 13, "double2fix647"); + test_checki64(double2fix64(-3.24999, 2), -13, "double2fix648"); + test_checki64(double2fix64(-3.25, 2), -13, "double2fix649"); + test_checki64(double2fix64(-3.0, -1), -2, "double2fix6410"); // not very useful + test_checki64(double2fix64(2147483648.0 * 2147483648.0, 16), INT64_MAX, "double2ufix6411"); + test_checki64(double2fix64(0.0, 16), 0, "double2fix6412"); + test_checki64(double2fix64(-0.0, 16), 0, "double2fix6413"); + test_checki64(double2fix64(0.0, -16), 0, "double2fix6412b"); + test_checki64(double2fix64(-0.0, -16), 0, "double2fix6413b"); + test_checki64(double2fix64(-3.25, 40), -13ll * (1ll << 38), "double2fix6414"); + u64d.u = 0xc00a000000000001; + test_checki64(double2fix64(u64d.d, 40), -13ll * (1ll << 38) - 1ll, "double2fix6414b"); + + u64d.u = 0xc00a000080000001; + test_checki64(double2fix64(u64d.d, 20), -13ll * (1ll << 18) - 2ll, "double2fix6415c"); + u64d.u = 0xc00a000080000000; + test_checki64(double2fix64(u64d.d, 20), -13ll * (1ll << 18) - 1ll, "double2fix6415d"); + u64d.u = 0xc00a000000000001; + test_checki64(double2fix64(u64d.d, 20), -13ll * (1ll << 18) - 1ll, "double2fix6415e"); + u64d.u = 0xc00a000000000000; + test_checki64(double2fix64(u64d.d, 20), -13ll * (1ll << 18), "double2fix6415g"); + + u64d.u = 0xc00a000080000001; + test_checki64(double2fix64(u64d.d, 19), -13ll * (1ll << 17) - 1ll, "double2fix6415h"); + u64d.u = 0xc00a000080000000; + test_checki64(double2fix64(u64d.d, 19), -13ll * (1ll << 17) - 1ll, "double2fix6415i"); + u64d.u = 0xc00a000000000001; + test_checki64(double2fix64(u64d.d, 19), -13ll * (1ll << 17) - 1ll, "double2fix6415j"); + u64d.u = 0xc00a000000000000; + test_checki64(double2fix64(u64d.d, 19), -13ll * (1ll << 17), "double2fix6415k"); + + printf("double2ufix64\n"); + test_checku64(double2ufix64(3.5, 8), 0x380, "double2ufix641"); + test_checku64(double2ufix64(-3.5, 8), 0, "double2ufix642"); + test_checku64(double2ufix64(32768.0, 16), 32768ull << 16, "double2ufix643"); + test_checku64(double2ufix64(65536.0, 16), 65536ull << 16, "double2ufix644"); + test_checku64(double2ufix64(2147483648.0, 16), 2147483648ull << 16, "double2ufix644b"); + test_checku64(double2ufix64(INFINITY, 16), UINT64_MAX, "double2ufix645"); + test_checku64(double2ufix64(-INFINITY, 16), 0, "double2ufix645b"); + test_checku64(double2ufix64(INFINITY, -16), UINT64_MAX, "double2ufix645c"); + test_checku64(double2ufix64(-INFINITY, -16), 0, "double2ufix645d"); + test_checku64(double2ufix64(3.24999, 2), 12, "double2ufix646"); + test_checku64(double2ufix64(3.25, 2), 13, "double2ufix647"); + test_checku64(double2ufix64(3.0, -1), 1, "double2ufix648"); // not very useful + test_checki64(double2ufix64(0.0, 16), 0, "double2ufix649"); + test_checki64(double2ufix64(-0.0, 16), 0, "double2ufix6410"); + + printf("double2fix_z\n"); + test_checki(double2fix_z(3.5, 8), 0x380, "double2fix_z1"); + test_checki(double2fix_z(-3.5, 8), -0x380, "double2fix_z2"); + test_checki(double2fix_z(32768.0, 16), INT32_MAX, "double2fix_z3"); + test_checki(double2fix_z(65536.0, 16), INT32_MAX, "double2fix_z4"); + test_checki(double2fix_z(INFINITY, 16), INT32_MAX, "double2fix_z5"); + test_checki(double2fix_z(-INFINITY, 16), INT32_MIN, "double2fix_z5b"); + test_checki(double2fix_z(INFINITY, -50), INT32_MAX, "double2fix_z5c"); + test_checki(double2fix_z(-INFINITY, -50), INT32_MIN, "double2fix_z5d"); + test_checki(double2fix_z(3.24999, 2), 12, "double2fix_z6"); + test_checki(double2fix_z(3.25, 2), 13, "double2fix_z7"); + test_checki(double2fix_z(-3.24999, 2), -12, "double2fix_z8"); + test_checki(double2fix_z(-3.25, 2), -13, "double2fix_z9"); + test_checki(double2fix_z(-0.75, 1), -1, "double2fix_z10"); + test_checki(double2fix_z(-3.0, -1), -1, "double2fix_z11"); // not very useful + test_checki(double2fix_z(0.0, 16), 0, "double2fix_z12"); + test_checki(double2fix_z(-0.0, 16), 0, "double2fix_z13"); + test_checki(double2fix_z(0.0, -16), 0, "double2fix_z12b"); + test_checki(double2fix_z(-0.0, -16), 0, "double2fix_z13b"); + + printf("double2ufix_z\n"); + test_checku(double2ufix_z(3.5, 8), 0x380, "double2ufix_z1"); + test_checku(double2ufix_z(-3.5, 8), 0, "double2ufix_z2"); + test_checku(double2ufix_z(32768.0, 16), 32768 << 16, "double2ufix_z3"); + test_checku(double2ufix_z(65536.0, 16), UINT32_MAX, "double2ufix_z4"); + test_checku(double2ufix_z(INFINITY, 16), UINT32_MAX, "double2ufix_z5"); + test_checku(double2ufix_z(-INFINITY, 16), 0, "double2ufix_z5b"); + test_checku(double2ufix_z(INFINITY, 16), UINT32_MAX, "double2ufix_z5c"); + test_checku(double2ufix_z(-INFINITY, 16), 0, "double2ufix_z5d"); + test_checku(double2ufix_z(3.24999, 2), 12, "double2ufix_z6"); + test_checku(double2ufix_z(3.25, 2), 13, "double2ufix_z7"); + test_checku(double2ufix_z(3.0, -1), 1, "double2ufix_z8"); // not very useful + test_checki(double2ufix_z(0.0, 16), 0, "double2fix_z9"); + test_checki(double2ufix_z(-0.0, 16), 0, "double2fix_z10"); + test_checki(double2ufix_z(0.0, -16), 0, "double2fix_z11"); + test_checki(double2ufix_z(-0.0, -16), 0, "double2fix_z12"); + + printf("double2fix64_z\n"); + test_checki64(double2fix64_z(3.5, 8), 0x380, "double2fix64_z1"); + test_checki64(double2fix64_z(-3.5, 8), -0x380, "double2fix64_z2"); + test_checki64(double2fix64_z(32768.0, 16), 32768ll << 16, "double2fix64_z3"); + test_checki64(double2fix64_z(65536.0, 16), 65536ll << 16, "double2fix64_z4"); + test_checki64(double2fix64_z(65536.0 * 65536.0 * 32768.0, 16), INT64_MAX, "double2fix64_z4b"); + test_checki64(double2fix64_z(INFINITY, 16), INT64_MAX, "double2fix64_z5"); + test_checki64(double2fix64_z(-INFINITY, 16), INT64_MIN, "double2fix64_z5"); + test_checki64(double2fix64_z(INFINITY, 16), INT64_MAX, "double2fix64_z5"); + test_checki64(double2fix64_z(-INFINITY, 16), INT64_MIN, "double2fix64_z5"); + test_checki64(double2fix64_z(3.24999, 2), 12, "double2fix64_z6"); + test_checki64(double2fix64_z(3.25, 2), 13, "double2fix64_z7"); + test_checki64(double2fix64_z(-3.24999, 2), -12, "double2fix64_z8"); + test_checki64(double2fix64_z(-3.25, 2), -13, "double2fix64_z9"); + test_checki64(double2fix64_z(-3.0, -1), -1, "double2fix64_z10"); // not very useful + test_checki64(double2fix64_z(0.0, 16), 0, "double2fix64_z11"); + test_checki64(double2fix64_z(-0.0, 16), 0, "double2fix64_z12"); + test_checki64(double2fix64_z(0.0, -16), 0, "double2fix64_z13"); + test_checki64(double2fix64_z(-0.0, -16), 0, "double2fix64_z14"); + test_checki64(double2fix64_z(-3.25, 40), -13ll * (1ll << 38), "double2fix64_z15"); + u64d.u = 0xc00a000000000001; + test_checki64(double2fix64_z(u64d.d, 40), -13ll * (1ll << 38), "double2fix64_z15b"); + + u64d.u = 0xc00a000080000001; + test_checki64(double2fix64_z(u64d.d, 20), -13ll * (1ll << 18) - 1ll, "double2fix64_z15c"); + u64d.u = 0xc00a000080000000; + test_checki64(double2fix64_z(u64d.d, 20), -13ll * (1ll << 18) - 1ll, "double2fix64_z15d"); + u64d.u = 0xc00a000000000001; + test_checki64(double2fix64_z(u64d.d, 20), -13ll * (1ll << 18), "double2fix64_z15e"); + u64d.u = 0xc00a000000000000; + test_checki64(double2fix64_z(u64d.d, 20), -13ll * (1ll << 18), "double2fix64_z15g"); + + u64d.u = 0xc00a000080000001; + test_checki64(double2fix64_z(u64d.d, 19), -13ll * (1ll << 17), "double2fix64_z15h"); + u64d.u = 0xc00a000080000000; + test_checki64(double2fix64_z(u64d.d, 19), -13ll * (1ll << 17), "double2fix64_z15i"); + u64d.u = 0xc00a000000000001; + test_checki64(double2fix64_z(u64d.d, 19), -13ll * (1ll << 17), "double2fix64_z15j"); + u64d.u = 0xc00a000000000000; + test_checki64(double2fix64_z(u64d.d, 19), -13ll * (1ll << 17), "double2fix64_z15k"); + + printf("double2ufix64_z\n"); + test_checku64(double2ufix64_z(3.5, 8), 0x380, "double2ufix64_z1"); + test_checku64(double2ufix64_z(-3.5, 8), 0, "double2ufix64_z2"); + test_checku64(double2ufix64_z(32768.0, 16), 32768ll << 16, "double2ufix64_z3"); + test_checku64(double2ufix64_z(65536.0, 16), 65536ll << 16, "double2ufix64_z4"); + test_checki64(double2ufix64_z(65536.0 * 65536.0 * 65536.0, 16), UINT64_MAX, "double2fix64_z4b"); + test_checku64(double2ufix64_z(INFINITY, 16), UINT64_MAX, "double2ufix64_z5"); + test_checku64(double2ufix64_z(-INFINITY, 16), 0, "double2ufix64_z5b"); + test_checku64(double2ufix64_z(INFINITY, 16), UINT64_MAX, "double2ufix64_z5c"); + test_checku64(double2ufix64_z(-INFINITY, 16), 0, "double2ufix64_z5d"); + test_checku64(double2ufix64_z(3.24999, 2), 12, "double2ufix64_z6"); + test_checku64(double2ufix64_z(3.25, 2), 13, "double2ufix64_z7"); + test_checki64(double2ufix64_z(3.0, -1), 1, "double2fuix64_z8"); // not very useful + test_checki64(double2ufix64_z(0.0, 16), 0, "double2ufix64_z9"); + test_checki64(double2ufix64_z(-0.0, 16), 0, "double2ufix64_z10"); + test_checki64(double2ufix64_z(0.0, -16), 0, "double2ufix64_z11"); + test_checki64(double2ufix64_z(-0.0, -16), 0, "double2ufix64_z12"); + + printf("double2int\n"); + test_checki(double2int(0.0), 0, "double2int1"); + test_checki(double2int(0.25), 0, "double2int1b"); + test_checki(double2int(0.5), 0, "double2int2"); + test_checki(double2int(0.75), 0, "double2int2b"); + test_checki(double2int(1.0), 1, "double2int3"); + test_checki(double2int(-10.0), -10, "double2int3a"); + test_checki(double2int(-0.0), 0, "double2int3b"); + test_checki(double2int(-0.25), -1, "double2int4"); + test_checki(double2int(-0.5), -1, "double2int4b"); + test_checki(double2int(-0.75), -1, "double2int5"); + test_checki(double2int(-1.0), -1, "double2int5b"); + // todo test correct rounding around maximum precision + test_checki(double2int(2147483646.0), INT32_MAX-1, "double2int6"); + test_checki(double2int(2147483647.0), INT32_MAX, "double2int6b"); + test_checki(double2int(21474836470.0), INT32_MAX, "double2int7"); + test_checki(double2int(-2147483648.0), INT32_MIN, "double2int8"); + test_checki(double2int(-21474836480.0), INT32_MIN, "double2int9"); + test_checki(double2int(-2.5), -3, "double2int10"); + test_checki(double2int(-2.4), -3, "double2int11"); + u64d.u = 0xc000000000000000ull; + test_checki(double2int(u64d.d), -2, "double2int12"); + u64d.u = 0xc008000000000000ull; + test_checki(double2int(u64d.d), -3, "double2int12b"); + u64d.u = 0xc000000000000001ull; + test_checki(double2int(u64d.d), -3, "double2int12c"); + u64d.u = 0xc000000080000000ull; + test_checki(double2int(u64d.d), -3, "double2int12d"); + u64d.u = 0xc000000100000000ull; + test_checki(double2int(u64d.d), -3, "double2int12e"); + u64d.u = 0xc000000100000001ull; + test_checki(double2int(u64d.d), -3, "double2int12f"); + test_checki(double2int(-2147483647.0), INT32_MIN+1, "double2int13"); + test_checki(double2int(-2147483647.1), INT32_MIN, "double2int14"); + test_checki(double2int(-2147483647.9), INT32_MIN, "double2int15"); + test_checki(double2int(-2147483648.0), INT32_MIN, "double2int16"); + test_checki(double2int(-2147483648.1), INT32_MIN, "double2int17"); + test_checki(double2int(-21474836480.1), INT32_MIN, "double2int18"); + + printf("double2uint\n"); + test_checku(double2uint(0.0), 0, "double2uint1"); + test_checku(double2uint(0.25), 0, "double2uint2"); + test_checku(double2uint(0.5), 0, "double2uint3"); + test_checku(double2uint(0.75), 0, "double2uint4"); + test_checku(double2uint(1.0), 1, "double2uint5"); + test_checku(double2uint(2147483647.0), INT32_MAX, "double2uint6"); + test_checku(double2uint(2147483648.0), INT32_MAX+1u, "double2uint7"); + test_checku(double2uint(4294967294.5), UINT32_MAX-1, "double2uint8"); + test_checku(double2uint(4294967295.0), UINT32_MAX, "double2uint9"); + test_checku(double2uint(42949672950.0), UINT32_MAX, "double2uint10"); + + printf("double2int64\n"); + test_checki64(double2int64(0.0), 0, "double2int641"); + test_checki64(double2int64(0.25), 0, "double2int641b"); + test_checki64(double2int64(0.5), 0, "double2int642"); + test_checki64(double2int64(0.75), 0, "double2int642b"); + test_checki64(double2int64(1.0), 1, "double2int643"); + test_checki64(double2int64(-10.0), -10, "double2int643a"); + test_checki64(double2int64(-0.0), 0, "double2int643b"); + test_checki64(double2int64(-0.25), -1, "double2int644"); + test_checki64(double2int64(-0.5), -1, "double2int644b"); + test_checki64(double2int64(-0.75), -1, "double2int645"); + test_checki64(double2int64(-1.0), -1, "double2int645b"); + // todo test correct rounding around maximum precision + test_checki64(double2int64(2147483647.0), INT32_MAX, "double2int646"); + test_checki64(double2int64(21474836470.0), 21474836470ll, "double2int647"); + test_checki64(double2int64(-2147483648.0), INT32_MIN, "double2int648"); + test_checki64(double2int64(-21474836480.0), -21474836480ll, "double2int649"); + test_checki64(double2int64(-2.5), -3, "double2int6410"); + test_checki64(double2int64(-2.4), -3, "double2int6411"); + u64d.u = 0xc000000000000000ull; + test_checki64(double2int64(u64d.d), -2, "double2int6412"); + u64d.u = 0xc008000000000000ull; + test_checki64(double2int64(u64d.d), -3, "double2int6412b"); + u64d.u = 0xc000000000000001ull; + test_checki64(double2int64(u64d.d), -3, "double2int6412c"); + u64d.u = 0xc000000080000000ull; + test_checki64(double2int64(u64d.d), -3, "double2int6412d"); + u64d.u = 0xc000000100000000ull; + test_checki64(double2int64(u64d.d), -3, "double2int6412e"); + u64d.u = 0xc000000100000001ull; + test_checki64(double2int64(u64d.d), -3, "double2int6412f"); + + printf("double2uint64\n"); + test_checku64(double2uint64(0.0), 0, "double2uint641"); + test_checku64(double2uint64(0.25), 0, "double2uint642"); + test_checku64(double2uint64(0.5), 0, "double2uint643"); + test_checku64(double2uint64(0.75), 0, "double2uint644"); + test_checku64(double2uint64(1.0), 1, "double2uint645"); + test_checku64(double2uint64(2147483647.0), INT32_MAX, "double2uint646"); + test_checku64(double2uint64(2147483648.0), INT32_MAX+1u, "double2uint647"); + // todo test correct rounding around maximum precision + test_checku64(double2uint64(4294967294.5), 4294967294ull, "double2uint648"); + test_checku64(double2uint64(4294967295.0), 4294967295ull, "double2uint649"); + test_checku64(double2uint64(42949672950.0), 42949672950, "double2uint6410"); +#endif + + // // These methods round towards 0. + printf("double2int_z\n"); + test_checki(double2int_z(0.0), 0, "double2int_z1"); + test_checki(double2int_z(0.25), 0, "double2int_z1b"); + test_checki(double2int_z(0.5), 0, "double2int_z2"); + test_checki(double2int_z(0.75), 0, "double2int_z2b"); + test_checki(double2int_z(1.0), 1, "double2int_z3"); + test_checki(double2int_z(-10.0), -10, "double2int_z3a"); + test_checki(double2int_z(-0.0), 0, "double2int_z3b"); + test_checki(double2int_z(-0.25), 0, "double2int_z4"); + test_checki(double2int_z(-0.5), 0, "double2int_z4b"); + test_checki(double2int_z(-0.75), 0, "double2int_z5"); + test_checki(double2int_z(-1.0), -1, "double2int_z5b"); + // todo test correct rounding around maximum precision + test_checki(double2int_z(2147483647.0), INT32_MAX, "double2int_z6"); + test_checki(double2int_z(21474836470.0), INT32_MAX, "double2int_z7"); + test_checki(double2int_z(-2147483648.0), INT32_MIN, "double2int_z8"); + test_checki(double2int_z(-21474836480.0), INT32_MIN, "double2int_z9"); + test_checki(double2int_z(-2.5), -2, "double2int_z10"); + test_checki(double2int_z(-2.4), -2, "double2int_z11"); + u64d.u = 0xc000000000000000ull; + test_checki(double2int_z(u64d.d), -2, "double2int_z12"); + u64d.u = 0xc008000000000000ull; + test_checki(double2int_z(u64d.d), -3, "double2int_z12b"); + u64d.u = 0xc000000000000001ull; + test_checki(double2int_z(u64d.d), -2, "double2int_z12c"); + u64d.u = 0xc000000080000000ull; + test_checki(double2int_z(u64d.d), -2, "double2int_z12d"); + u64d.u = 0xc000000100000000ull; + test_checki(double2int_z(u64d.d), -2, "double2int_z12e"); + u64d.u = 0xc000000100000001ull; + test_checki(double2int_z(u64d.d), -2, "double2int_z12f"); + + printf("double2int64_z\n"); + test_checki64(double2int64_z(0.0), 0, "double2int64_z1"); + test_checki64(double2int64_z(0.25), 0, "double2int64_z1b"); + test_checki64(double2int64_z(0.5), 0, "double2int64_z2"); + test_checki64(double2int64_z(0.75), 0, "double2int64_z2b"); + test_checki64(double2int64_z(1.0), 1, "double2int64_z3"); + test_checki64(double2int64_z(-10.0), -10, "double2int64_z3a"); + test_checki64(double2int64_z(-0.0), 0, "double2int64_z3b"); + test_checki64(double2int64_z(-0.25), 0, "double2int64_z4"); + test_checki64(double2int64_z(-0.5), 0, "double2int64_z4b"); + test_checki64(double2int64_z(-0.75), 0, "double2int64_z5"); + test_checki64(double2int64_z(-1.0), -1, "double2int64_z5b"); + // todo test correct rounding around maximum precision + test_checki64(double2int64_z(2147483647.0), 2147483647ll, "double2int64_z6"); + test_checki64(double2int64_z(21474836470.0), 21474836470ll, "double2int64_z7"); + test_checki64(double2int64_z(-2147483648.0), INT32_MIN, "double2int64_z8"); + test_checki64(double2int64_z(-21474836480.0), -21474836480ll, "double2int64_z9"); + test_checki64(double2int64_z(-2.5), -2, "double2int64_z10"); + test_checki64(double2int64_z(-2.4), -2, "double2int64_z11"); + + printf("double2uint_z\n"); + test_checku(double2uint_z(0.0), 0, "double2uint_z1"); + test_checku(double2uint_z(0.25), 0, "double2uint_z2"); + test_checku(double2uint_z(0.5), 0, "double2uint_z3"); + test_checku(double2uint_z(0.75), 0, "double2uint_z4"); + test_checku(double2uint_z(1.0), 1, "double2uint_z5"); + test_checku(double2uint_z(2147483647.0), INT32_MAX, "double2uint_z6"); + test_checku(double2uint_z(2147483648.0), INT32_MAX+1u, "double2uint_z7"); + // todo test correct rounding around maximum precision + test_checku(double2uint_z(4294967294.5), UINT32_MAX-1u, "double2uint_z8"); + test_checku(double2uint_z(4294967295.0), UINT32_MAX, "double2uint_z9"); + test_checku(double2uint_z(42949672950.0), UINT32_MAX, "double2uint_z10"); + + printf("double2uint64_z\n"); + test_checku64(double2uint64_z(0.0), 0, "double2uint64_z1"); + test_checku64(double2uint64_z(0.25), 0, "double2uint64_z2"); + test_checku64(double2uint64_z(0.5), 0, "double2uint64_z3"); + test_checku64(double2uint64_z(0.75), 0, "double2uint64_z4"); + test_checku64(double2uint64_z(1.0), 1, "double2uint64_z5"); + test_checku64(double2uint64_z(2147483647.0), INT32_MAX, "double2uint64_z6"); + test_checku64(double2uint64_z(2147483648.0), INT32_MAX+1u, "double2uint64_z7"); + // todo test correct rounding around maximum precision + test_checku64(double2uint64_z(4294967294.5), 4294967294ull, "double2uint64_z8"); + test_checku64(double2uint64_z(4294967295.0), 4294967295ull, "double2uint64_z9"); + test_checku64(double2uint64_z(4294967296.0), 4294967296ull, "double2uint64_z9b"); + test_checku64(double2uint64_z(42949672950.0), 42949672950ull, "double2uint64_z10"); + + // double exp10(double x); + // void sincos(double x, double *sinx, double *cosx); + // double powint(double x, int y); + return rc; +} + +int main() { + stdio_init_all(); + int rc = test(); + if (rc) { + printf("FAILED\n"); + } else { + printf("PASSED\n"); + } +} diff --git a/test/pico_float_test/custom_float_funcs_test.c b/test/pico_float_test/custom_float_funcs_test.c new file mode 100644 index 00000000..d749f778 --- /dev/null +++ b/test/pico_float_test/custom_float_funcs_test.c @@ -0,0 +1,402 @@ +#include +#include "pico/stdlib.h" +#include "pico/float.h" +#include "math.h" + +#if 0 +#define printf(...) ((void)0) +#endif +#if 0 +#define stop() return -1 +#else +#define stop() rc=1 +#endif +#define test_assert(x) ({ if (!(x)) { printf("Assertion failed: ");puts(#x);printf(" at " __FILE__ ":%d\n", __LINE__); stop(); } }) +#define test_checkf(x, expected, msg) ({ if ((x) != (expected)) { printf(" %s: %f != %f\n", msg, x, expected); stop(); } }) +#define test_checki(x, expected, msg) ({ if ((x) != (expected)) { printf(" %s: %d != %d\n", msg, x, expected); stop(); } }) +#define test_checku(x, expected, msg) ({ if ((uint32_t)(x) != (uint32_t)(expected)) { printf(" %s: %u != %u\n", msg, x, expected); stop(); } }) +#define test_checki64(x, expected, msg) ({ if ((x) != (expected)) { printf(" %s: %lld != %lld\n", msg, (int64_t)(x), (int64_t)(expected)); stop(); } }) +#define test_checku64(x, expected, msg) ({ if ((uint64_t)(x) != (uint64_t)(expected)) { printf(" %s: %llu != %llu\n", msg, (uint64_t)(x), (uint64_t)(expected)); stop(); } }) + +#if !(LIB_PICO_FLOAT_COMPILER || defined(__riscv)) +static inline float fix2float_8(int32_t m) { return fix2float(m, 8); } +static inline float fix2float_12(int32_t m) { return fix2float(m, 12); } +static inline float fix2float_16(int32_t m) { return fix2float(m, 16); } +static inline float fix2float_24(int32_t m) { return fix2float(m, 24); } +static inline float fix2float_28(int32_t m) { return fix2float(m, 28); } +static inline float fix2float_32(int32_t m) { return fix2float(m, 32); } + +static inline float ufix2float_12(int32_t m) { return ufix2float(m, 12); } + +static inline float float2fix_12(int32_t m) { return float2fix(m, 12); } + +static inline float float2ufix_12(int32_t m) { return float2ufix(m, 12); } +#endif + +#if 1 && (LIB_PICO_FLOAT_COMPILER || defined(__riscv)) +#if __SOFTFP__ || defined(__riscv) +#define FREG "+r" +#else +#define FREG "+t" +#endif +// prevent the compiler from eliding the calculations +#define float2int_z(f) ({ float _f = f; pico_default_asm_volatile("" : FREG (_f)); float2 ## int_z(_f); }) +#define float2uint_z(f) ({ float _f = f; pico_default_asm_volatile("" : FREG (_f)); float2 ## uint_z(_f); }) +#define float2int64_z(f) ({ float _f = f; pico_default_asm_volatile("" : FREG (_f)); float2 ## int64_z(_f); }) +#define float2uint64_z(f) ({ float _f = f; pico_default_asm_volatile("" : FREG (_f)); float2 ## uint64_z(_f); }) +#define int2float(i) ({ int32_t _i = i; pico_default_asm_volatile("" : "+r" (_i)); int2 ## float(_i); }) +#define uint2float(i) ({ uint32_t _i = i; pico_default_asm_volatile("" : "+r" (_i)); uint2 ## float(_i); }) +#define int642float(i) ({ int64_t _i = i; pico_default_asm_volatile("" : "+r" (_i)); int642 ## float(_i); }) +#define uint642float(i) ({ uint64_t _i = i; pico_default_asm_volatile("" : "+r" (_i)); uint642 ## float(_i); }) +#endif + +#if 1 && LIB_PICO_FLOAT_VFP +// prevet the compiler from eliding the calculations +#undef float2int_z +#undef float2uint_z +#undef int2float +#undef uint2float +#endif + +int test() { + int rc = 0; +#if LIB_PICO_FLOAT_PICO_DCP + printf(">>> Using DCP\n"); +#endif +#if LIB_PICO_FLOAT_PICO_VFP + printf(">>> Using VFP\n"); +#endif + printf("int2float\n"); + test_checkf(int2float(0), 0.0f, "int2float1"); + test_checkf(int2float(-1), -1.0f, "int2float2"); + test_checkf(int2float(1), 1.0f, "int2float3"); + test_checkf(int2float(INT32_MAX), 2147483647.0f, "int2float4"); + test_checkf(int2float(INT32_MIN), -2147483648.0f, "int2float5"); + // check rounding + test_checkf(int2float(2147483391), 2147483392.0f, "int2float6"); + test_checkf(int2float(2147483456), 2147483392.0f, "int2float7"); + test_checkf(int2float(2147483457), 2147483520.0f, "int2float8"); + test_checkf(int2float(2147483483), 2147483520.0f, "int2float9"); + test_checkf(int2float(2147483584), 2147483648.0f, "int2float10"); + + printf("uint2float\n"); + test_checkf(uint2float(0), 0.0f, "uint2float1"); + test_checkf(uint2float(1), 1.0f, "uint2float2"); + test_checkf(uint2float(INT32_MAX), 2147483647.0f, "uint2float3"); + // todo test correct rounding around maximum precision + test_checkf(uint2float(UINT32_MAX), 4294967295.0f, "uint2float4"); + + printf("int642float\n"); + test_checkf(int642float(0), 0.0f, "int642float1"); + test_checkf(int642float(-1), -1.0f, "int642float2"); + test_checkf(int642float(1), 1.0f, "int642float3"); + test_checkf(int642float(INT32_MAX-1), 2147483646.0f, "int642float4"); // note equality is within 1ulp + test_checkf(int642float(INT32_MAX), 2147483647.0f, "int642float5"); // note equality is within 1ulp + test_checkf(int642float(INT32_MAX+1ll), 2147483648.0f, "int642float6"); + test_checkf(int642float(INT32_MIN-1ll), -2147483649.0f, "int642float7"); // note equality is within 1ulp + test_checkf(int642float(INT32_MIN), -2147483648.0f, "int642float8"); + test_checkf(int642float(INT32_MIN+1ll), -2147483647.0f, "int642float9"); // note equality is within 1ulp + // todo test correct rounding around maximum precision + test_checkf(int642float(INT64_MAX), 9223372036854775807.0f, "int642float10"); + test_checkf(int642float(INT64_MIN), -9223372036854775808.0f, "int642float11"); + + printf("uint642float\n"); + test_checkf(uint642float(0), 0.0f, "uint642float1"); + test_checkf(uint642float(1), 1.0f, "uint642float2"); + test_checkf(uint642float(INT32_MAX-1), 2147483646.0f, "uint642float3"); // note equality is within 1ulp + test_checkf(uint642float(INT32_MAX), 2147483647.0f, "uint642float4"); // note equality is within 1ulp + test_checkf(uint642float(INT32_MAX+1ll), 2147483648.0f, "uint642float5"); + test_checkf(uint642float(INT64_MAX), 9223372036854775807.0f, "uint642float6"); + // todo test correct rounding around maximum precision + test_checkf(uint642float(UINT64_MAX), 18446744073709551615.0f, "uint642float7"); + + union { + uint32_t u; + float f; + } u32f; + +#if !(LIB_PICO_FLOAT_COMPILER || defined(__riscv)) + printf("fix2float\n"); + // todo test correct rounding around maximum precision + test_checkf(fix2float(-3, 1), -1.5f, "fix2float1"); + test_checkf(fix2float(-3, 1), -1.5f, "fix2float2"); + test_checkf(fix2float(-3, -4), -48.0f, "fix2float3"); + + printf("ufix2float\n"); + // todo test correct rounding around maximum precision + test_checkf(ufix2float(0xa0000000, 30), 2.5f, "ufix2float1"); + test_checkf(ufix2float(3, -4), 48.0f, "ufix2float2"); + + printf("fix642float\n"); + // todo test correct rounding around maximum precision + test_checkf(fix642float(-0xa000000000ll, 38), -2.5f, "fix6422float1"); + test_checkf(fix642float(-3, -34), -51539607552.0f, "fix642float2"); + + printf("ufix642float\n"); + // todo test correct rounding around maximum precision + test_checkf(ufix642float(0xa000000000ll, 38), 2.5f, "ufix642float1"); + test_checkf(ufix642float(3, -34), 51539607552.0f, "fix64float2"); + + test_checkf(fix2float_8(128), 0.5f, "fix2float_8_1"); + test_checkf(fix2float_8(-128), -0.5f, "fix2float_8_2"); + test_checkf(fix2float_16(8192), 0.125f, "fix2float_8_3"); + test_checkf(fix2float_16(-8192), -0.125f, "fix2float_8_4"); + test_checkf(fix2float_24(3<<23), 1.5f, "fix2float_8_5"); + test_checkf(fix2float_24(-(3<<23)), -1.5f, "fix2float_8_6"); + + printf("float2fix\n"); + test_checki(float2fix(-0.5f, 8), -0x80, "float2fix0"); + test_checki(float2fix(3.5f, 8), 0x380, "float2fix1"); + test_checki(float2fix(-3.5f, 8), -0x380, "float2fix2"); + test_checki(float2fix(32768.0f, 16), INT32_MAX, "float2fix3"); + test_checki(float2fix(65536.0f, 16), INT32_MAX, "float2fix4"); + test_checki(float2fix(-65536.0f, 16), INT32_MIN, "float2fix4b"); + test_checki(float2fix(INFINITY, 16), INT32_MAX, "float2fix5"); + test_checki(float2fix(-INFINITY, 16), INT32_MIN, "float2fix5b"); + test_checki(float2fix(3.24999f, 2), 12, "float2fix6"); + test_checki(float2fix(3.25f, 2), 13, "float2fix7"); + test_checki(float2fix(-3.24999f, 2), -13, "float2fix8"); + test_checki(float2fix(-3.25f, 2), -13, "float2fix9"); + test_checki(float2fix(-0.75f, 1), -2, "float2fix10"); + test_checki(float2fix(-3.0f, -1), -2, "float2fix11"); // not very useful + u32f.u = 0x7f012345; + test_checki(float2fix(u32f.f, 1), INT32_MAX, "float2fix12"); + u32f.u = 0xff012345; + test_checki(float2fix(u32f.f, 1), INT32_MIN, "float2fix13"); + + printf("float2ufix\n"); + test_checku(float2ufix(3.5f, 8), 0x380, "float2ufix1"); + test_checku(float2ufix(-3.5f, 8), 0, "float2ufix2"); + test_checku(float2ufix(32768.0f, 16), 32768 << 16, "float2ufix3"); + test_checku(float2ufix(65536.0f, 16), UINT32_MAX, "float2ufix4"); + test_checku(float2ufix(INFINITY, 16), UINT32_MAX, "float2ufix5"); + test_checku(float2ufix(3.24999f, 2), 12, "float2ufix6"); + test_checku(float2ufix(3.25f, 2), 13, "float2ufix7"); + test_checku(float2ufix(3.0f, -1), 1, "float2ufix8"); // not very useful + + printf("float2fix64\n"); + test_checki64(float2fix64(3.5f, 8), 0x380, "float2fix641"); + test_checki64(float2fix64(-3.5f, 8), -0x380, "float2fix642"); + test_checki64(float2fix64(32768.0f, 16), 32768ll << 16, "float2fix643"); + test_checki64(float2fix64(65536.0f, 16), 65536ll << 16, "float2fix644"); + test_checki64(float2fix64(2147483648.0f, 16), 2147483648ll << 16, "float2ufix644b"); + test_checki64(float2fix64(65536.0f * 65536.0f * 32768.0f, 16), INT64_MAX, "float2fix644c"); + test_checki64(float2fix64(INFINITY, 16), INT64_MAX, "float2fix645"); + test_checki64(float2fix64(3.24999f, 2), 12, "float2fix646"); + test_checki64(float2fix64(3.25f, 2), 13, "float2fix647"); + test_checki64(float2fix64(-3.24999f, 2), -13, "float2fix648"); + test_checki64(float2fix64(-3.25f, 2), -13, "float2fix649"); + test_checki64(float2fix64(-3.0f, -1), -2, "float2fix6410"); // not very useful + + printf("float2ufix64\n"); + test_checku64(float2ufix64(3.5f, 8), 0x380, "float2ufix641"); + test_checku64(float2ufix64(-3.5f, 8), 0, "float2ufix642"); + test_checku64(float2ufix64(32768.0f, 16), 32768ull << 16, "float2ufix643"); + test_checku64(float2ufix64(65536.0f, 16), 65536ull << 16, "float2ufix644"); + test_checku64(float2ufix64(2147483648.0f, 16), 2147483648ull << 16, "float2ufix644b"); + test_checku64(float2ufix64(INFINITY, 16), UINT64_MAX, "float2ufix645"); + test_checku64(float2ufix64(3.24999f, 2), 12, "float2ufix646"); + test_checku64(float2ufix64(3.25f, 2), 13, "float2ufix647"); + test_checku64(float2ufix64(3.0f, -1), 1, "float2ufix648"); // not very useful + + printf("float2fix_z\n"); + test_checki(float2fix_z(3.5f, 8), 0x380, "float2fix_z1"); + test_checki(float2fix_z(-3.5f, 8), -0x380, "float2fix_z2"); + test_checki(float2fix_z(32768.0f, 16), INT32_MAX, "float2fix_z3"); + test_checki(float2fix_z(65536.0f, 16), INT32_MAX, "float2fix_z4"); + test_checki(float2fix_z(INFINITY, 16), INT32_MAX, "float2fix_z5"); + test_checki(float2fix_z(-INFINITY, 16), INT32_MIN, "float2fix_z5b"); + test_checki(float2fix_z(3.24999f, 2), 12, "float2fix_z6"); + test_checki(float2fix_z(3.25f, 2), 13, "float2fix_z7"); + test_checki(float2fix_z(-3.24999f, 2), -12, "float2fix_z8"); + test_checki(float2fix_z(-3.25f, 2), -13, "float2fix_z9"); + test_checki(float2fix_z(-0.75f, 1), -1, "float2fix_z10"); + test_checki(float2fix_z(-3.0f, -1), -1, "float2fix_z11"); // not very useful + u32f.u = 0x7f012345; + test_checki(float2fix_z(u32f.f, 1), INT32_MAX, "float2fix_z12"); + u32f.u = 0xff012345; + test_checki(float2fix_z(u32f.f, 1), INT32_MIN, "float2fix_z13"); + + printf("float2ufix_z\n"); + test_checku(float2ufix_z(3.5f, 8), 0x380, "float2ufix_z1"); + test_checku(float2ufix_z(-3.5f, 8), 0, "float2ufix_z2"); + test_checku(float2ufix_z(32768.0f, 16), 32768 << 16, "float2ufix_z3"); + test_checku(float2ufix_z(65536.0f, 16), UINT32_MAX, "float2ufix_z4"); + test_checku(float2ufix_z(INFINITY, 16), UINT32_MAX, "float2ufix_z5"); + test_checku(float2ufix_z(3.24999f, 2), 12, "float2ufix_z6"); + test_checku(float2ufix_z(3.25f, 2), 13, "float2ufix_z7"); + test_checku(float2ufix_z(3.0f, -1), 1, "float2ufix_z8"); // not very useful + u32f.u = 0x7f012345; + test_checku(float2ufix_z(u32f.f, 1), UINT32_MAX, "float2fix_z9"); + u32f.u = 0xff012345; + test_checku(float2ufix_z(u32f.f, 1), 0, "float2fix_z10"); + + printf("float2fix64_z\n"); + test_checki64(float2fix64_z(3.5f, 8), 0x380, "float2fix64_z1"); + test_checki64(float2fix64_z(-3.5f, 8), -0x380, "float2fix64_z2"); + test_checki64(float2fix64_z(32768.0f, 16), 32768ll << 16, "float2fix64_z3"); + test_checki64(float2fix64_z(65536.0f, 16), 65536ll << 16, "float2fix64_z4"); + test_checki64(float2fix64_z(65536.0f * 65536.0f * 32768.0f, 16), INT64_MAX, "float2fix64_z4b"); + test_checki64(float2fix64_z(INFINITY, 16), INT64_MAX, "float2fix64_z5"); + test_checki64(float2fix64_z(3.24999f, 2), 12, "float2fix64_z6"); + test_checki64(float2fix64_z(3.25f, 2), 13, "float2fix64_z7"); + test_checki64(float2fix64_z(-3.24999f, 2), -12, "float2fix64_z8"); + test_checki64(float2fix64_z(-3.25f, 2), -13, "float2fix64_z9"); + test_checki64(float2fix64_z(-3.0f, -1), -1, "float2fix64_z10"); // not very useful + + printf("float2ufix64_z\n"); + test_checku64(float2ufix64_z(3.5f, 8), 0x380, "float2ufix64_z1"); + test_checku64(float2ufix64_z(-3.5f, 8), 0, "float2ufix64_z2"); + test_checku64(float2ufix64_z(32768.0f, 16), 32768ll << 16, "float2ufix64_z3"); + test_checku64(float2ufix64_z(65536.0f, 16), 65536ll << 16, "float2ufix64_z4"); + test_checki64(float2ufix64_z(65536.0f * 65536.0f * 65536.0f, 16), UINT64_MAX, "float2fix64_z4b"); + test_checku64(float2ufix64_z(INFINITY, 16), UINT64_MAX, "float2ufix64_z5"); + test_checku64(float2ufix64_z(3.24999f, 2), 12, "float2ufix64_z6"); + test_checku64(float2ufix64_z(3.25f, 2), 13, "float2ufix64_z7"); + test_checki64(float2ufix64_z(3.0f, -1), 1, "float2fuix64_z8"); // not very useful + + printf("float2int\n"); + test_checki(float2int(0.0f), 0, "float2int1"); + test_checki(float2int(0.25f), 0, "float2int1b"); + test_checki(float2int(0.5f), 0, "float2int2"); + test_checki(float2int(0.75f), 0, "float2int2b"); + test_checki(float2int(1.0f), 1, "float2int3"); + test_checki(float2int(-10.0f), -10, "float2int3a"); + test_checki(float2int(-0.0f), 0, "float2int3b"); + test_checki(float2int(-0.25f), -1, "float2int4"); + test_checki(float2int(-0.5f), -1, "float2int4b"); + test_checki(float2int(-0.75f), -1, "float2int5"); + test_checki(float2int(-1.0f), -1, "float2int5b"); + // todo test correct rounding around maximum precision + test_checki(float2int(2147483647.0f), INT32_MAX, "float2int6"); + test_checki(float2int(21474836470.0f), INT32_MAX, "float2int7"); + test_checki(float2int(-2147483648.0f), INT32_MIN, "float2int8"); + test_checki(float2int(-21474836480.0f), INT32_MIN, "float2int9"); + test_checki(float2int(-2.5f), -3, "float2int10"); + test_checki(float2int(-2.4f), -3, "float2int11"); + + printf("float2uint\n"); + test_checku(float2uint(0.0f), 0, "float2uint1"); + test_checku(float2uint(0.25f), 0, "float2uint2"); + test_checku(float2uint(0.5f), 0, "float2uint3"); + test_checku(float2uint(0.75f), 0, "float2uint4"); + test_checku(float2uint(1.0f), 1, "float2uint5"); + test_checku(float2uint(2147483647.0f), INT32_MAX+1u, "float2uint6"); // note loss of precision + test_checku(float2uint(2147483648.0f), INT32_MAX+1u, "float2uint7"); + test_checku(float2uint(4294967294.5f), UINT32_MAX, "float2uint8"); // note loss of precision + test_checku(float2uint(4294967295.0f), UINT32_MAX, "float2uint9"); + test_checku(float2uint(42949672950.0f), UINT32_MAX, "float2uint10"); + + printf("float2int64\n"); + test_checki64(float2int64(0.0f), 0, "float2int641"); + test_checki64(float2int64(0.25f), 0, "float2int641b"); + test_checki64(float2int64(0.5f), 0, "float2int642"); + test_checki64(float2int64(0.75f), 0, "float2int642b"); + test_checki64(float2int64(1.0f), 1, "float2int643"); + test_checki64(float2int64(-10.0f), -10, "float2int643a"); + test_checki64(float2int64(-0.0f), 0, "float2int643b"); + test_checki64(float2int64(-0.25f), -1, "float2int644"); + test_checki64(float2int64(-0.5f), -1, "float2int644b"); + test_checki64(float2int64(-0.75f), -1, "float2int645"); + test_checki64(float2int64(-1.0f), -1, "float2int645b"); + // todo test correct rounding around maximum precision + test_checki64(float2int64(2147483647.0f), INT32_MAX+1ll, "float2int646"); + test_checki64(float2int64(21474836470.0f), 21474836480ll, "float2int647"); // note loss of precision + test_checki64(float2int64(-2147483648.0f), INT32_MIN, "float2int648"); + test_checki64(float2int64(-21474836480.0f), -21474836480ll, "float2int649"); + test_checki64(float2int64(-2.5f), -3, "float2int6410"); + test_checki64(float2int64(-2.4f), -3, "float2int6411"); + + printf("float2uint64\n"); + test_checku64(float2uint64(0.0f), 0, "float2uint641"); + test_checku64(float2uint64(0.25f), 0, "float2uint642"); + test_checku64(float2uint64(0.5f), 0, "float2uint643"); + test_checku64(float2uint64(0.75f), 0, "float2uint644"); + test_checku64(float2uint64(1.0f), 1, "float2uint645"); + test_checku64(float2uint64(2147483647.0f), INT32_MAX+1u, "float2uint646"); // note loss of precision + test_checku64(float2uint64(2147483648.0f), INT32_MAX+1u, "float2uint647"); + test_checku64(float2uint64(4294967294.5f), 4294967296ull, "float2uint648"); // note loss of precision + test_checku64(float2uint64(4294967295.0f), 4294967296ull, "float2uint649"); // note loss of precision + test_checku64(float2uint64(42949672950.0f), 42949672960ull, "float2uint6410"); // note loss of precision +#endif + + // // These methods round towards 0. + printf("float2int_z\n"); + test_checki(float2int_z(0.0f), 0, "float2int_z1"); + test_checki(float2int_z(0.25f), 0, "float2int_z1b"); + test_checki(float2int_z(0.5f), 0, "float2int_z2"); + test_checki(float2int_z(0.75f), 0, "float2int_z2b"); + test_checki(float2int_z(1.0f), 1, "float2int_z3"); + test_checki(float2int_z(-10.0f), -10, "float2int_z3a"); + test_checki(float2int_z(-0.0f), 0, "float2int_z3b"); + test_checki(float2int_z(-0.25f), 0, "float2int_z4"); + test_checki(float2int_z(-0.5f), 0, "float2int_z4b"); + test_checki(float2int_z(-0.75f), 0, "float2int_z5"); + test_checki(float2int_z(-1.0f), -1, "float2int_z5b"); + // todo test correct rounding around maximum precision + test_checki(float2int_z(2147483647.0f), INT32_MAX, "float2int_z6"); + test_checki(float2int_z(21474836470.0f), INT32_MAX, "float2int_z7"); + test_checki(float2int_z(-2147483648.0f), INT32_MIN, "float2int_z8"); + test_checki(float2int_z(-21474836480.0f), INT32_MIN, "float2int_z9"); + test_checki(float2int_z(-2.5f), -2, "float2int_z10"); + test_checki(float2int_z(-2.4f), -2, "float2int_z11"); + + printf("float2int64_z\n"); + test_checki64(float2int64_z(0.0f), 0, "float2int64_z1"); + test_checki64(float2int64_z(0.25f), 0, "float2int64_z1b"); + test_checki64(float2int64_z(0.5f), 0, "float2int64_z2"); + test_checki64(float2int64_z(0.75f), 0, "float2int64_z2b"); + test_checki64(float2int64_z(1.0f), 1, "float2int64_z3"); + test_checki64(float2int64_z(-10.0f), -10, "float2int64_z3a"); + test_checki64(float2int64_z(-0.0f), 0, "float2int64_z3b"); + test_checki64(float2int64_z(-0.25f), 0, "float2int64_z4"); + test_checki64(float2int64_z(-0.5f), 0, "float2int64_z4b"); + test_checki64(float2int64_z(-0.75f), 0, "float2int64_z5"); + test_checki64(float2int64_z(-1.0f), -1, "float2int64_z5b"); + test_checki64(float2int64_z(2147483647.0f), 2147483648ll, "float2int64_z6"); // note loss of precision + test_checki64(float2int64_z(21474836470.0f), 21474836480ll, "float2int64_z7"); // note loss of precision + test_checki64(float2int64_z(-2147483648.0f), INT32_MIN, "float2int64_z8"); + test_checki64(float2int64_z(-21474836480.0f), -21474836480ll, "float2int64_z9"); + test_checki64(float2int64_z(-2.5f), -2, "float2int64_z10"); + test_checki64(float2int64_z(-2.4f), -2, "float2int64_z11"); + + printf("float2uint_z\n"); + test_checku(float2uint_z(0.0f), 0, "float2uint_z1"); + test_checku(float2uint_z(0.25f), 0, "float2uint_z2"); + test_checku(float2uint_z(0.5f), 0, "float2uint_z3"); + test_checku(float2uint_z(0.75f), 0, "float2uint_z4"); + test_checku(float2uint_z(1.0f), 1, "float2uint_z5"); + test_checku(float2uint_z(2147483647.0f), INT32_MAX+1u, "float2uint_z6"); // note loss of precision + test_checku(float2uint_z(2147483648.0f), INT32_MAX+1u, "float2uint_z7"); + // todo test correct rounding around maximum precision + test_checku(float2uint_z(4294967294.5f), UINT32_MAX, "float2uint_z8"); // note loss of precision + test_checku(float2uint_z(4294967295.0f), UINT32_MAX, "float2uint_z9"); + test_checku(float2uint_z(42949672950.0f), UINT32_MAX, "float2uint_z10"); + + printf("float2uint64_z\n"); + test_checku64(float2uint64_z(0.0f), 0, "float2uint64_z1"); + test_checku64(float2uint64_z(0.25f), 0, "float2uint64_z2"); + test_checku64(float2uint64_z(0.5f), 0, "float2uint64_z3"); + test_checku64(float2uint64_z(0.75f), 0, "float2uint64_z4"); + test_checku64(float2uint64_z(1.0f), 1, "float2uint64_z5"); + test_checku64(float2uint64_z(2147483647.0f), INT32_MAX+1u, "float2uint64_z6"); // note loss of precision + test_checku64(float2uint64_z(2147483648.0f), INT32_MAX+1u, "float2uint64_z7"); + test_checku64(float2uint64_z(4294967294.5f), 4294967296ull, "float2uint64_z8"); // note loss of precision + test_checku64(float2uint64_z(4294967295.0f), 4294967296ull, "float2uint64_z9"); // note loss of precision + test_checku64(float2uint64_z(42949672950.0f), 42949672960ull, "float2uint64_z10"); // note loss of precision + + // float exp10f(float x); + // void sincosf(float x, float *sinx, float *cosx); + // float powintf(float x, int y); + return rc; +} + +int main() { + stdio_init_all(); + int rc = test(); + if (rc) { + printf("FAILED\n"); + } else { + printf("PASSED\n"); + } +}