mirror of
https://github.com/raspberrypi/pico-sdk.git
synced 2025-08-07 17:02:52 +03:00
rationalize pico_float/pico_double libraries (#2208)
* on RP2350 _dcp variant now enables -msoft-float, since if you're using this at all it is likely because you don't want to use the VFP unit at all (to save stack space) * implement all float_ and double_ conversion functions in all pico_float_pico_ variants and pico_double_pico on RP2040 and RP2350 (many were missing in some combinations) * provide better granularity of what functions are wrapped in each case also marked custom_xxx_funcs_test.c as not in bazel build yet
This commit is contained in:
@@ -535,7 +535,7 @@ static inline void dma_channel_start(uint channel) {
|
||||
*\endcode
|
||||
*
|
||||
* \if rp2350_specific
|
||||
* RP2350 only: Due to errata RP12350-E5 (see the RP2350 datasheet for further detail), it is necessary to clear the enable bit of
|
||||
* RP2350 only: Due to errata RP2350-E5 (see the RP2350 datasheet for further detail), it is necessary to clear the enable bit of
|
||||
* the aborted channel and any chained channels prior to the abort to prevent re-triggering.
|
||||
* \endif
|
||||
*
|
||||
|
@@ -7,7 +7,7 @@
|
||||
#include "pico/asm_helper.S"
|
||||
|
||||
#if !HAS_DOUBLE_COPROCESSOR
|
||||
#error attempt to compile double_aeabi_rp2350 when there is no DCP
|
||||
#error attempt to compile double_aeabi_dcp when there is no DCP
|
||||
#else
|
||||
|
||||
#include "hardware/dcp_instr.inc.S"
|
||||
@@ -29,7 +29,7 @@ double_section WRAPPER_FUNC_NAME(\func)
|
||||
|
||||
// ============== STATE SAVE AND RESTORE ===============
|
||||
|
||||
.macro saving_func type func
|
||||
.macro saving_func type func, opt_label1='-', opt_label2='-'
|
||||
// Note we are usually 32-bit aligned already at this point, as most of the
|
||||
// function bodies contain exactly two 16-bit instructions: bmi and bx lr.
|
||||
// We want the PCMP word-aligned.
|
||||
@@ -41,6 +41,12 @@ double_section WRAPPER_FUNC_NAME(\func)
|
||||
push {lr} // 16-bit instruction
|
||||
bl generic_save_state // 32-bit instruction
|
||||
b 1f // 16-bit instruction
|
||||
.ifnc \opt_label1,'-'
|
||||
regular_func \opt_label1
|
||||
.endif
|
||||
.ifnc \opt_label2,'-'
|
||||
regular_func \opt_label2
|
||||
.endif
|
||||
// This is the actual entry point:
|
||||
\type\()_func \func
|
||||
PCMP apsr_nzcv
|
||||
@@ -128,53 +134,124 @@ saving_func wrapper sqrt
|
||||
dcp_dsqrt_m r0,r1,r0,r1,r0,r1,r2,r3,r12
|
||||
saving_func_return
|
||||
|
||||
// todo not a real thing
|
||||
double_wrapper_section __aeabi_dclassify
|
||||
saving_func wrapper __aeabi_dclassify
|
||||
@ with correct rounding
|
||||
double_section dclassify
|
||||
saving_func regular dclassify
|
||||
dcp_dclassify_m apsr_nzcv,r0,r1
|
||||
saving_func_return
|
||||
|
||||
// ============== CONVERSION FUNCTIONS ===============
|
||||
|
||||
double_wrapper_section __aeabi_d2f
|
||||
saving_func wrapper __aeabi_d2f
|
||||
saving_func wrapper __aeabi_d2f double2float
|
||||
@ with rounding
|
||||
dcp_double2float_m r0,r0,r1
|
||||
saving_func_return
|
||||
|
||||
double_wrapper_section __aeabi_i2d
|
||||
saving_func wrapper __aeabi_i2d
|
||||
saving_func wrapper __aeabi_i2d int2double
|
||||
dcp_int2double_m r0,r1,r0
|
||||
saving_func_return
|
||||
|
||||
double_wrapper_section __aeabi_ui2d
|
||||
saving_func wrapper __aeabi_ui2d
|
||||
saving_func wrapper __aeabi_ui2d uint2double
|
||||
dcp_uint2double_m r0,r1,r0
|
||||
saving_func_return
|
||||
|
||||
double_section double2fix_z
|
||||
saving_func regular double2fix_z
|
||||
ubfx r3, r1, #20, #11
|
||||
adds r3, r2
|
||||
beq 1f // very small; we don't care that we might make a denormal
|
||||
asrs ip, r3, #11
|
||||
beq 1f
|
||||
ite pl
|
||||
movpl r3, #0x7ff
|
||||
movsmi r3, #0
|
||||
1:
|
||||
bfi r1, r3, #20, #11
|
||||
b double2int_z_entry
|
||||
|
||||
double_section double2ufix
|
||||
saving_func regular double2ufix_z double2ufix
|
||||
double2ufix_z_entry:
|
||||
ubfx r3, r1, #20, #11
|
||||
adds r3, r2
|
||||
beq 1f // very small; we don't care that we might make a denormal
|
||||
asrs ip, r3, #11
|
||||
beq 1f
|
||||
ite pl
|
||||
lsrspl r3, r1, #20 // 0x7ff
|
||||
movsmi r3, #0
|
||||
1:
|
||||
bfi r1, r3, #20, #11
|
||||
b double2uint_z_entry
|
||||
|
||||
double_section double2fix
|
||||
saving_func regular double2fix
|
||||
ubfx r3, r1, #20, #11
|
||||
cbz r3, 2f // 0 or denormal
|
||||
adds r3, r2
|
||||
beq 1f // very small; we don't care that we might make a denormal
|
||||
asrs ip, r3, #11
|
||||
beq 1f
|
||||
ite pl
|
||||
movpl r3, #0x7ff
|
||||
movsmi r3, #0
|
||||
1:
|
||||
bfi r1, r3, #20, #11
|
||||
b double2int_entry
|
||||
2:
|
||||
movs r0, #0
|
||||
saving_func_return
|
||||
|
||||
|
||||
double_section double2int
|
||||
saving_func regular double2int
|
||||
double2int_entry:
|
||||
lsls r2, r1, #1
|
||||
bcc double2int_z_entry // positive is ok for int64_z
|
||||
lsrs r3, r2, #21
|
||||
beq double2int_z_entry // 0 or -0 or denormal is ok for int_z
|
||||
|
||||
lsrs r2, #21
|
||||
adds r2, #1
|
||||
subs r2, r2, #0x400
|
||||
bcc 1f // <1 means subtract 1
|
||||
cmp r2, #31
|
||||
bge double2int_z_entry // must be an integer or maxed out
|
||||
lsls r3, r1, #12
|
||||
adds r3, r3, r0, lsr #20 // r3 now has highest 32 mantissa bits
|
||||
lsls r3, r2
|
||||
orrs r3, r3, r0, lsl #12 // these bits are all guaranteed to be in the fraction
|
||||
beq double2int_z_entry // integer
|
||||
1:
|
||||
dcp_double2int_m r0,r0,r1
|
||||
subs r0, #1
|
||||
saving_func_return
|
||||
|
||||
double_wrapper_section __aeabi_d2iz
|
||||
saving_func wrapper __aeabi_d2iz
|
||||
saving_func wrapper __aeabi_d2iz double2int_z
|
||||
double2int_z_entry:
|
||||
@ with truncation towards 0
|
||||
dcp_double2int_m r0,r0,r1
|
||||
// note: this works with either saved or not saved call as it is just a `bx lr`
|
||||
saving_func_return
|
||||
|
||||
double_wrapper_section __aeabi_d2uiz
|
||||
saving_func wrapper __aeabi_d2uiz
|
||||
saving_func wrapper __aeabi_d2uiz double2uint double2uint_z
|
||||
double2uint_z_entry:
|
||||
@ with truncation towards 0
|
||||
dcp_double2uint_m r0,r0,r1
|
||||
saving_func_return
|
||||
|
||||
// todo not a real thing
|
||||
double_wrapper_section __aeabi_d2i_r
|
||||
saving_func wrapper __aeabi_d2i_r
|
||||
double_section double2int_r
|
||||
saving_func regular double2int_r
|
||||
@ with rounding
|
||||
dcp_double2int_r_m r0,r0,r1
|
||||
saving_func_return
|
||||
|
||||
// todo not a real thing
|
||||
double_wrapper_section __aeabi_d2ui_r
|
||||
saving_func wrapper __aeabi_d2ui_r
|
||||
double_section double2uint_r
|
||||
saving_func regular double2uint_r
|
||||
@ with rounding
|
||||
dcp_double2uint_r_m r0,r0,r1
|
||||
saving_func_return
|
||||
@@ -189,7 +266,6 @@ saving_func wrapper __aeabi_dcmpun
|
||||
saving_func_return
|
||||
|
||||
double_wrapper_section __aeabi_dcmp
|
||||
|
||||
saving_func wrapper __aeabi_cdrcmple
|
||||
dcp_dcmp_m apsr_nzcv,r2,r3,r0,r1 // with arguments reversed
|
||||
bvs cmp_nan
|
||||
|
@@ -425,6 +425,7 @@ double_wrapper_section __aeabi_ui2d
|
||||
double_wrapper_section __aeabi_i2d
|
||||
|
||||
wrapper_func __aeabi_ui2d
|
||||
regular_func uint2double
|
||||
movs r1, #0
|
||||
cmp r0, #0
|
||||
bne 2f
|
||||
@@ -432,6 +433,7 @@ wrapper_func __aeabi_ui2d
|
||||
bx lr
|
||||
// double FUNC_NAME(__aeabi_i2d)(int) integer to double (double precision) conversion
|
||||
wrapper_func __aeabi_i2d
|
||||
regular_func int2double
|
||||
asrs r1, r0, #31
|
||||
eors r0, r1
|
||||
subs r0, r1
|
||||
@@ -506,6 +508,7 @@ regular_func double2int
|
||||
// unsigned FUNC_NAME(__aeabi_d2uiz)(double) double (double precision) to unsigned C-style conversion [3]
|
||||
double_wrapper_section __aeabi_d2uiz
|
||||
wrapper_func __aeabi_d2uiz
|
||||
regular_func double2uint_z
|
||||
regular_func double2uint
|
||||
shimmable_table_tail_call SF_TABLE_FLOAT2UINT double2uint_shim
|
||||
|
||||
@@ -528,11 +531,13 @@ regular_func ufix642double
|
||||
// double FUNC_NAME(__aeabi_l2d)(long long) long long to double (double precision) conversion
|
||||
double_wrapper_section __aeabi_l2d
|
||||
wrapper_func __aeabi_l2d
|
||||
regular_func int642double
|
||||
shimmable_table_tail_call SF_TABLE_INT642FLOAT int642double_shim
|
||||
|
||||
// double FUNC_NAME(__aeabi_l2f)(long long) long long to double (double precision) conversion
|
||||
double_wrapper_section __aeabi_ul2d
|
||||
wrapper_func __aeabi_ul2d
|
||||
regular_func uint642double
|
||||
shimmable_table_tail_call SF_TABLE_UINT642FLOAT uint642double_shim
|
||||
|
||||
// long long FUNC_NAME(__aeabi_d2lz)(double) double (double precision) to long long C-style conversion [3]
|
||||
@@ -566,22 +571,106 @@ regular_func double2int64
|
||||
// unsigned long long FUNC_NAME(__aeabi_d2ulz)(double) double to unsigned long long C-style conversion [3]
|
||||
double_wrapper_section __aeabi_d2ulz
|
||||
wrapper_func __aeabi_d2ulz
|
||||
regular_func double2uint64
|
||||
regular_func double2uint64_z
|
||||
shimmable_table_tail_call SF_TABLE_FLOAT2UINT64 double2uint64_shim
|
||||
|
||||
double_section double2fix64_z
|
||||
regular_func double2fix64_z
|
||||
lsls r3, r1, #1
|
||||
bcc double2fix64 // input positive is ok for fix64
|
||||
mov ip, r2
|
||||
asrs r2, r3, #21
|
||||
beq 3f // input zero or denormal, so just return zero
|
||||
adds r2, #1
|
||||
beq double2fix64 // input infinite/nan is ok for fix64
|
||||
|
||||
lsrs r3, #21
|
||||
add r3, ip
|
||||
movs r2, #1
|
||||
negs r2, r2
|
||||
lsrs r2, #22
|
||||
subs r3, r2 // r3 = modified e - 0x3ff
|
||||
|
||||
bcc 3f // modified input < 1.0 means result is zero
|
||||
cmp r3, #52
|
||||
bge 2f // modified input must be an integer or infinite
|
||||
|
||||
adds r3, #12
|
||||
mov r2, r1
|
||||
lsls r2, r2, r3 // r2 has remaining fractional mantissa bits of r1
|
||||
bne 1f // not integer as non zero fractional bits remain
|
||||
subs r3, #32
|
||||
asrs r2, r3, #31
|
||||
bics r3, r3, r2
|
||||
movs r2, r0
|
||||
lsls r2, r2, r3
|
||||
bne 1f // remaining fractional bits are non-zero, so argument was not an integer
|
||||
2:
|
||||
// integer
|
||||
mov r2, ip
|
||||
b double2fix64
|
||||
3: // result is zero
|
||||
movs r0, #0
|
||||
movs r1, #0
|
||||
bx lr
|
||||
1:
|
||||
push {lr}
|
||||
mov r2, ip
|
||||
bl double2fix64
|
||||
movs r2, #0
|
||||
adds r0, #1
|
||||
adcs r1, r2
|
||||
pop {pc}
|
||||
|
||||
double_section double2fix64
|
||||
regular_func double2fix64
|
||||
shimmable_table_tail_call SF_TABLE_FLOAT2FIX64 double2fix64_shim
|
||||
|
||||
double_section double2ufix64
|
||||
regular_func double2ufix64
|
||||
regular_func double2ufix64_z
|
||||
shimmable_table_tail_call SF_TABLE_FLOAT2UFIX64 double2ufix64_shim
|
||||
|
||||
double_section double2fix
|
||||
regular_func double2fix
|
||||
shimmable_table_tail_call SF_TABLE_FLOAT2FIX double2fix_shim
|
||||
|
||||
double_section double2fix_z
|
||||
regular_func double2fix_z
|
||||
lsls r3, r1, #1
|
||||
asrs r3, #21
|
||||
beq 2f // input is zero or denormal
|
||||
adds r3, #1
|
||||
beq 3f // input is infinite or nan
|
||||
|
||||
// extract exponent again
|
||||
lsls r3, r1, #1
|
||||
lsrs r3, #21
|
||||
// adjust
|
||||
adds r3, r2
|
||||
ble 2f // adjusted input is zero or dedornmal or < 1
|
||||
lsrs r3, r3, #11
|
||||
bne 3f // adjusted input is > infinite
|
||||
|
||||
lsls r2, r2, #20 // align exponent adjustment offset
|
||||
adds r1, r1, r2 // we know adjustment is safe
|
||||
b double2int_z
|
||||
2:
|
||||
// result is zero
|
||||
movs r0, #0
|
||||
bx lr
|
||||
3:
|
||||
movs r0, #0
|
||||
subs r0, #1
|
||||
lsrs r0, #1
|
||||
asrs r1, #31
|
||||
eors r0, r1
|
||||
bx lr
|
||||
|
||||
double_section double2ufix
|
||||
regular_func double2ufix
|
||||
regular_func double2ufix_z
|
||||
shimmable_table_tail_call SF_TABLE_FLOAT2UFIX double2ufix_shim
|
||||
|
||||
double_wrapper_section __aeabi_d2f
|
||||
|
@@ -249,7 +249,69 @@ regular_func ufix2double
|
||||
movs r1,#0
|
||||
bx r14
|
||||
|
||||
double_wrapper_section conv_dtoi64
|
||||
double_section conv_dtoi64
|
||||
regular_func double2int64
|
||||
lsls r3, r1, #1
|
||||
bcc double2int64_z // input positive is ok for int64_z
|
||||
cmp r3, #0xffe00000
|
||||
bcs double2int64_z // input is infinite
|
||||
lsrs r3, #21
|
||||
beq 2f // input zero or denormal, means answer remains zero
|
||||
sub r3, #0x3ff
|
||||
cmp r3, #0
|
||||
blt 1f // input is less than 1.0
|
||||
cmp r3, #52
|
||||
bge double2int64_z // modified input must be an integer or infinite
|
||||
adds r3, #12
|
||||
lsls r2, r1, r3 // r2 has remaining fractional mantissa bits of r1
|
||||
bne 1f // not integer as non zero fractional bits remain
|
||||
subs r3, #32
|
||||
bics r3, r3, r3, asr #31 // map negative shift to zero
|
||||
lsls r3, r0, r3
|
||||
beq double2int64_z // remaining fractional bits are 0, so argument was an integer
|
||||
1:
|
||||
push {lr}
|
||||
bl double2int64_z
|
||||
subs r0, #1
|
||||
sbcs r1, r1, #0
|
||||
pop {pc}
|
||||
2:
|
||||
movs r0, #0
|
||||
movs r1, #0
|
||||
bx lr
|
||||
|
||||
double_section conv_dtofix64
|
||||
regular_func double2fix64
|
||||
lsls r3, r1, #1
|
||||
bcc double2fix64_z // input positive is ok for fix64_z
|
||||
cmp r3, #0xffe00000
|
||||
bcs double2fix64_z // input is infinite
|
||||
lsrs r3, #21
|
||||
beq 2f // input zero or denormal, means answer remains zero
|
||||
sub r3, #0x3ff
|
||||
adds r3, r2
|
||||
blt 1f // modified input zero or denormal, or less than 1.0
|
||||
cmp r3, #52
|
||||
bge double2fix64_z // modified input must be an integer or infinite
|
||||
adds r3, #12
|
||||
lsls ip, r1, r3 // ip has remaining fractional mantissa bits of r1
|
||||
bne 1f // not integer as non zero fractional bits remain
|
||||
subs r3, #32
|
||||
bics r3, r3, r3, asr #31 // map negative shift to zero
|
||||
lsls r3, r0, r3
|
||||
beq double2fix64_z // remaining fractional bits are 0, so argument was an integer
|
||||
1:
|
||||
push {lr}
|
||||
bl double2fix64_z
|
||||
subs r0, #1
|
||||
sbcs r1, r1, #0
|
||||
pop {pc}
|
||||
2:
|
||||
movs r0, #0
|
||||
movs r1, #0
|
||||
bx lr
|
||||
|
||||
double_wrapper_section conv_dtoi64_z
|
||||
|
||||
@ convert double to signed int64, rounding towards 0, clamping
|
||||
wrapper_func __aeabi_d2lz
|
||||
|
@@ -582,7 +582,7 @@ wrapper_func fma
|
||||
saving_func_return
|
||||
|
||||
|
||||
double_wrapper_section __dmla
|
||||
double_section fma_fast
|
||||
@ cf saving_func macro: but here we need to record the SP before the state save possibly changes it
|
||||
1:
|
||||
push {lr} // 16-bit instruction
|
||||
@@ -592,6 +592,7 @@ double_wrapper_section __dmla
|
||||
@ r0:r1 m
|
||||
@ r2:r3 n
|
||||
@ [r13,#0] a
|
||||
regular_func fma_fast
|
||||
regular_func mla
|
||||
mov r12,sp @ save the SP
|
||||
PCMP apsr_nzcv @ test the engaged flag
|
||||
|
@@ -16,50 +16,153 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
/** \file double.h
|
||||
* \defgroup pico_double pico_double
|
||||
* \defgroup pico_double pico_double
|
||||
*
|
||||
* \brief Optimized double-precision floating point functions
|
||||
*
|
||||
* (Replacement) optimized implementations are provided of the following compiler built-ins
|
||||
* and math library functions:
|
||||
* An application can take control of the floating point routines used in the application over and above what is provided by the compiler,
|
||||
* by depending on the pico_double library. A user might want to do this:
|
||||
*
|
||||
* - __aeabi_dadd, __aeabi_ddiv, __aeabi_dmul, __aeabi_drsub, __aeabi_dsub, __aeabi_cdcmpeq, __aeabi_cdrcmple, __aeabi_cdcmple, __aeabi_dcmpeq, __aeabi_dcmplt, __aeabi_dcmple, __aeabi_dcmpge, __aeabi_dcmpgt, __aeabi_dcmpun, __aeabi_i2d, __aeabi_l2d, __aeabi_ui2d, __aeabi_ul2d, __aeabi_d2iz, __aeabi_d2lz, __aeabi_d2uiz, __aeabi_d2ulz, __aeabi_d2f
|
||||
* - sqrt, cos, sin, tan, atan2, exp, log, ldexp, copysign, trunc, floor, ceil, round, asin, acos, atan, sinh, cosh, tanh, asinh, acosh, atanh, exp2, log2, exp10, log10, pow,, hypot, cbrt, fmod, drem, remainder, remquo, expm1, log1p, fma
|
||||
* - powint, sincos (GNU extensions)
|
||||
* 1. To use optimized software implementations provided by the RP2-series device's bootrom or the SDK
|
||||
* 2. To use optimized combined software/hardware implementations utilizing custom RP2-series hardware for acceleration
|
||||
* 3. To control the amount of C compiler/library code bloat
|
||||
* 4. To make sure no floating point is called at all
|
||||
*
|
||||
* The following additional optimized functions are also provided:
|
||||
* The pico_double library comes in three main flavors:
|
||||
*
|
||||
* - int2double, uint2double, int642double, uint642double, fix2double, ufix2double, fix642double, ufix642double
|
||||
* - double2fix, double2ufix, double2fix64, double2ufix64, double2int, double2uint, double2int64, double2uint64, double2int_z, double2int64_z,
|
||||
* - exp10, sincos, powint
|
||||
* 1. `pico_double_none` - all floating point operations cause a \ref panic - no double-precision floating point code is included
|
||||
* 2. `pico_double_compiler` - no custom functions are provided; all double-precision floating point is handled by the C compiler/library
|
||||
* 3. `pico_double_pico` - the smallest and fastest available for the platform, along with additional functionality (e.g. fixed point conversions) which are detailed below
|
||||
*
|
||||
* On RP2350 the following additional functions are available; the _fast methods are faster but do not round correctly"
|
||||
* The user can control which version they want (e.g. **pico_double_xxx** by either setting the CMake global variable
|
||||
* `PICO_DEFAULT_DOUBLE_IMPL=xxx`, or by using the CMake function `pico_set_double_implementation(<TARGET> xxx)`. Note that in the absence
|
||||
* of either, pico_double_pico is used by default.
|
||||
*
|
||||
* - ddiv_fast, sqrt_fast
|
||||
* \if rp2040_specific
|
||||
* On RP2040, `pico_double_pico` uses optimized hand coded implementations from the bootrom and the SDK for both
|
||||
* basic double-precision floating point operations and floating point math library functions. These implementations
|
||||
* are generally faster and smaller than those provided by the C compiler/library, though they don't support all the features of a fully compliant
|
||||
* floating point implementation; they are however usually fine for the majority of cases
|
||||
* \endif
|
||||
*
|
||||
* \if rp2350_specific
|
||||
* On RP2350, `pico_double_pico` uses RP2350 DCP instructions (double co-processor) to implement fast version of the basic
|
||||
* arithmetic functions, and provides optimized M33 implementations of trignometric and scientific functions.
|
||||
* These implementations are generally faster and smaller than those provided by the C compiler/library, though they don't support all the features of a fully compliant
|
||||
* floating point implementation; they are however usually fine for the majority of cases
|
||||
* \endif
|
||||
*
|
||||
* On Arm, (replacement) optimized implementations are provided for the following compiler built-ins
|
||||
* and math library functions when using `pico_double_pico`:
|
||||
*
|
||||
* - basic arithmetic:
|
||||
*
|
||||
* __aeabi_dadd, __aeabi_ddiv, __aeabi_dmul, __aeabi_drsub, __aeabi_dsub
|
||||
*
|
||||
* - comparison:
|
||||
*
|
||||
* __aeabi_cfcmpeq, __aeabi_cfrcmple, __aeabi_cfcmple, __aeabi_dcmpeq, __aeabi_dcmplt, __aeabi_dcmple, __aeabi_dcmpge, __aeabi_dcmpgt, __aeabi_dcmpun
|
||||
*
|
||||
* - (u)int32 <-> double:
|
||||
*
|
||||
* __aeabi_i2d, __aeabi_ui2d, __aeabi_d2iz, __aeabi_d2uiz
|
||||
*
|
||||
* - (u)int64 <-> double:
|
||||
*
|
||||
* __aeabi_l2d, __aeabi_ul2d, __aeabi_d2lz, __aeabi_d2ulz
|
||||
*
|
||||
* - double -> float:
|
||||
*
|
||||
* __aeabi_d2d
|
||||
*
|
||||
* - basic trigonometric:
|
||||
*
|
||||
* sqrt, cos, sin, tan, atan2, exp, log
|
||||
*
|
||||
* - trigonometric and scientific
|
||||
*
|
||||
* ldexp, copysign, trunc, floor, ceil, round, asin, acos, atan, sinh, cosh, tanh, asinh, acosh, atanh, exp2, log2, exp10, log10, pow, hypot, cbrt, fmod, drem, remainder, remquo, expm1, log1p, fma
|
||||
*
|
||||
* - GNU exetnsions:
|
||||
*
|
||||
* powint, sincos
|
||||
*
|
||||
* On Arm, the following additional optimized functions are also provided when using `pico_double_pico`:
|
||||
*
|
||||
* - Conversions to/from integer types:
|
||||
*
|
||||
* - (u)int -> double (round to nearest):
|
||||
*
|
||||
* int2double, uint2double, int642double, uint642double
|
||||
*
|
||||
* - (u)double -> int (round towards zero):
|
||||
*
|
||||
* double2int_z, double2uint_z, double2int64_z, double2uint64_z
|
||||
*
|
||||
* - (u)double -> int (round towards -infinity):
|
||||
*
|
||||
* double2int, double2uint, double2int64, double2uint64
|
||||
*
|
||||
* - Conversions to/from fixed point integers:
|
||||
*
|
||||
* - (u)fix -> double (round to nearest):
|
||||
*
|
||||
* fix2double, ufix2double, fix642double, ufix642double
|
||||
*
|
||||
* - double -> (u)fix (round towards zero):
|
||||
*
|
||||
* double2fix_z, double2ufix_z, double2fix64_z, double2ufix64_z
|
||||
*
|
||||
* - double -> (u)fix (round towards -infinity):
|
||||
*
|
||||
* double2fix, double2ufix, double2fix64, double2ufix64
|
||||
*
|
||||
* - Even faster versions of divide and square-root functions that do not round correctly:
|
||||
*
|
||||
* ddiv_fast, sqrt_fast (these do not round correctly)
|
||||
*
|
||||
* - Faster unfused multiply and accumulate:
|
||||
*
|
||||
* mla (fast fma)
|
||||
*
|
||||
* \if rp2350_specific
|
||||
* On RISC-V there is no custom double-precision floating point support, so `pico_double_pico` is equivalent to `pico_double_compiler`
|
||||
* \endif
|
||||
*/
|
||||
#if !defined(__riscv) || PICO_COMBINED_DOCS
|
||||
|
||||
#if PICO_COMBINED_DOCS || !LIB_PICO_DOUBLE_COMPILER
|
||||
double int2double(int32_t i);
|
||||
double uint2double(uint32_t u);
|
||||
double uint2double(uint32_t i);
|
||||
double int642double(int64_t i);
|
||||
double uint642double(uint64_t u);
|
||||
double uint642double(uint64_t i);
|
||||
double fix2double(int32_t m, int e);
|
||||
double ufix2double(uint32_t m, int e);
|
||||
double fix642double(int64_t m, int e);
|
||||
double ufix642double(uint64_t m, int e);
|
||||
|
||||
// These methods round towards -Infinity.
|
||||
int32_t double2fix(double d, int e);
|
||||
uint32_t double2ufix(double d, int e);
|
||||
int64_t double2fix64(double d, int e);
|
||||
uint64_t double2ufix64(double d, int e);
|
||||
int32_t double2int(double d);
|
||||
uint32_t double2uint(double d);
|
||||
int64_t double2int64(double d);
|
||||
uint64_t double2uint64(double d);
|
||||
// These methods round towards 0, which IS the C way
|
||||
int32_t double2int_z(double f);
|
||||
int64_t double2int64_z(double f);
|
||||
int32_t double2uint_z(double f);
|
||||
int64_t double2uint64_z(double f);
|
||||
int32_t double2fix_z(double f, int e);
|
||||
uint32_t double2ufix_z(double f, int e);
|
||||
int64_t double2fix64_z(double f, int e);
|
||||
uint64_t double2ufix64_z(double f, int e);
|
||||
|
||||
// These methods round towards 0.
|
||||
int32_t double2int_z(double d);
|
||||
int64_t double2int64_z(double d);
|
||||
// These methods round towards -Infinity - which IS NOT the C way for negative numbers;
|
||||
// as such the naming is not ideal, however is kept for backwards compatibility
|
||||
int32_t double2int(double f);
|
||||
uint32_t double2uint(double f);
|
||||
int64_t double2int64(double f);
|
||||
uint64_t double2uint64(double f);
|
||||
int32_t double2fix(double f, int e);
|
||||
uint32_t double2ufix(double f, int e);
|
||||
int64_t double2fix64(double f, int e);
|
||||
uint64_t double2ufix64(double f, int e);
|
||||
|
||||
#endif
|
||||
|
||||
double exp10(double x);
|
||||
void sincos(double x, double *sinx, double *cosx);
|
||||
@@ -67,8 +170,24 @@ double powint(double x, int y);
|
||||
|
||||
#if !PICO_RP2040 || PICO_COMBINED_DOCS
|
||||
double ddiv_fast(double n, double d);
|
||||
double sqrt_fast(double d);
|
||||
double mla(double x, double y, double z); // note this is not fused
|
||||
double sqrt_fast(double f);
|
||||
double fma_fast(double x, double y, double z); // this is not fused
|
||||
double mla(double x, double y, double z); // another name for fma_fast
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if LIB_PICO_DOUBLE_COMPILER || defined(__riscv)
|
||||
// when using the compiler; we provide as many functions as we trivially can, though in the double case they are not optimal
|
||||
static inline double int2double(int32_t i) { return (double)i; }
|
||||
static inline double uint2double(uint32_t i) { return (double)i; }
|
||||
static inline double int642double(int64_t i) { return (double)i; }
|
||||
static inline double uint642double(uint64_t i) { return (double)i; }
|
||||
|
||||
static inline int32_t double2int_z(double d) { return (int32_t)d; }
|
||||
static inline int64_t double2int64_z(double d) { return (int64_t)d; }
|
||||
static inline int32_t double2uint_z(double d) { return (uint32_t)d; }
|
||||
static inline int64_t double2uint64_z(double d) { return (uint64_t)d; }
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
@@ -76,4 +195,3 @@ double mla(double x, double y, double z); // note this is not fused
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
@@ -2,13 +2,16 @@ load("//bazel:defs.bzl", "compatible_with_rp2", "incompatible_with_config")
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
_WRAP_FLOAT_AEABI_FLAGS = [
|
||||
_WRAP_FLOAT_AEABI_ARITHMETIC_FLAGS = [
|
||||
"-Wl,--wrap=__aeabi_fadd",
|
||||
"-Wl,--wrap=__aeabi_fdiv",
|
||||
"-Wl,--wrap=__aeabi_fmul",
|
||||
"-Wl,--wrap=__aeabi_frsub",
|
||||
"-Wl,--wrap=__aeabi_fsub",
|
||||
"-Wl,--wrap=__aeabi_cfcmpeq",
|
||||
]
|
||||
|
||||
_WRAP_FLOAT_AEABI_CMP_FLAGS = [
|
||||
"-Wl,--wrap=__aeabi_cfrcmple",
|
||||
"-Wl,--wrap=__aeabi_cfcmple",
|
||||
"-Wl,--wrap=__aeabi_fcmpeq",
|
||||
@@ -17,15 +20,27 @@ _WRAP_FLOAT_AEABI_FLAGS = [
|
||||
"-Wl,--wrap=__aeabi_fcmpge",
|
||||
"-Wl,--wrap=__aeabi_fcmpgt",
|
||||
"-Wl,--wrap=__aeabi_fcmpun",
|
||||
]
|
||||
|
||||
_WRAP_FLOAT_AEABI_CONV_32_FLAGS = [
|
||||
"-Wl,--wrap=__aeabi_i2f",
|
||||
"-Wl,--wrap=__aeabi_l2f",
|
||||
"-Wl,--wrap=__aeabi_ui2f",
|
||||
"-Wl,--wrap=__aeabi_ul2f",
|
||||
]
|
||||
|
||||
_WRAP_FLOAT_AEABI_CONV_64_FLAGS = [
|
||||
"-Wl,--wrap=__aeabi_f2iz",
|
||||
"-Wl,--wrap=__aeabi_f2lz",
|
||||
"-Wl,--wrap=__aeabi_f2uiz",
|
||||
"-Wl,--wrap=__aeabi_f2ulz",
|
||||
]
|
||||
|
||||
_WRAP_FLOAT_AEABI_CONV_DOUBLE_FLAGS = [
|
||||
"-Wl,--wrap=__aeabi_f2d",
|
||||
]
|
||||
|
||||
_WRAP_FLOAT_SQRTF_FLAGS = [
|
||||
"-Wl,--wrap=sqrtf",
|
||||
]
|
||||
|
||||
@@ -36,13 +51,16 @@ _WRAP_FLOAT_SCI_FLAGS = [
|
||||
"-Wl,--wrap=atan2f",
|
||||
"-Wl,--wrap=expf",
|
||||
"-Wl,--wrap=logf",
|
||||
"-Wl,--wrap=sincosf", # gnu
|
||||
]
|
||||
|
||||
_WRAP_FLOAT_SCI_EXTRA_FLAGS = [
|
||||
"-Wl,--wrap=ldexpf",
|
||||
"-Wl,--wrap=copysignf",
|
||||
"-Wl,--wrap=truncf",
|
||||
"-Wl,--wrap=floorf",
|
||||
"-Wl,--wrap=ceilf",
|
||||
"-Wl,--wrap=roundf",
|
||||
"-Wl,--wrap=sincosf", # gnu
|
||||
"-Wl,--wrap=asinf",
|
||||
"-Wl,--wrap=acosf",
|
||||
"-Wl,--wrap=atanf",
|
||||
@@ -114,30 +132,31 @@ _PICO_FLOAT_IMPLS = [
|
||||
],
|
||||
"compatibility": incompatible_with_config("@platforms//cpu:riscv32") + ["//bazel/constraint:rp2040"],
|
||||
"extra_deps": [],
|
||||
"linkopts": _WRAP_FLOAT_AEABI_FLAGS + _WRAP_FLOAT_SCI_FLAGS,
|
||||
"linkopts": _WRAP_FLOAT_AEABI_ARITHMETIC_FLAGS + _WRAP_FLOAT_AEABI_CMP_FLAGS + _WRAP_FLOAT_AEABI_CONV_32_FLAGS + _WRAP_FLOAT_AEABI_CONV_64_FLAGS + _WRAP_FLOAT_AEABI_CONV_DOUBLE_FLAGS + _WRAP_FLOAT_SQRTF_FLAGS + _WRAP_FLOAT_SCI_FLAGS + _WRAP_FLOAT_SCI_EXTRA_FLAGS,
|
||||
},
|
||||
{
|
||||
"name": "dcp",
|
||||
"srcs": [
|
||||
"float_aeabi_dcp.S",
|
||||
"float_conv_m33.S",
|
||||
"float_common_m33.S",
|
||||
"float_math.c",
|
||||
"float_sci_m33.S",
|
||||
],
|
||||
"compatibility": compatible_with_rp2() + incompatible_with_config("@platforms//cpu:riscv32") + incompatible_with_config("//bazel/constraint:rp2040"),
|
||||
"extra_deps": ["//src/rp2_common/hardware_dcp"],
|
||||
"linkopts": _WRAP_FLOAT_SCI_FLAGS,
|
||||
"linkopts": _WRAP_FLOAT_AEABI_ARITHMETIC_FLAGS + _WRAP_FLOAT_AEABI_CMP_FLAGS + _WRAP_FLOAT_AEABI_CONV_32_FLAGS + _WRAP_FLOAT_AEABI_CONV_64_FLAGS + _WRAP_FLOAT_AEABI_CONV_DOUBLE_FLAGS + _WRAP_FLOAT_SQRTF_FLAGS + _WRAP_FLOAT_SCI_FLAGS + _WRAP_FLOAT_SCI_EXTRA_FLAGS,
|
||||
},
|
||||
{
|
||||
"name": "vfp",
|
||||
"srcs": [
|
||||
"float_conv32_vfp.S",
|
||||
"float_sci_m33_vfp.S",
|
||||
"float_conv_m33.S",
|
||||
"float_common_m33.S",
|
||||
"float_math.c",
|
||||
],
|
||||
"compatibility": compatible_with_rp2() + incompatible_with_config("@platforms//cpu:riscv32") + incompatible_with_config("//bazel/constraint:rp2040"),
|
||||
"extra_deps": ["//src/rp2_common/hardware_dcp"],
|
||||
"linkopts": _WRAP_FLOAT_SCI_FLAGS,
|
||||
"linkopts": _WRAP_FLOAT_AEABI_CONV_64_FLAGS + _WRAP_FLOAT_SCI_FLAGS + _WRAP_FLOAT_SCI_EXTRA_FLAGS,
|
||||
},
|
||||
{
|
||||
"name": "single_hazard3",
|
||||
@@ -146,7 +165,7 @@ _PICO_FLOAT_IMPLS = [
|
||||
],
|
||||
"compatibility": compatible_with_rp2() + ["@platforms//cpu:riscv32"],
|
||||
"extra_deps": ["//src/rp2_common/hardware_hazard3"],
|
||||
"linkopts": _WRAP_FLOAT_SCI_FLAGS,
|
||||
"linkopts": _WRAP_FLOAT_SCI_EXTRA_FLAGS,
|
||||
},
|
||||
]
|
||||
|
||||
@@ -184,7 +203,7 @@ cc_library(
|
||||
hdrs = ["include/pico/float.h"],
|
||||
defines = ["LIB_PICO_FLOAT_PICO=0"],
|
||||
includes = ["include"],
|
||||
linkopts = _WRAP_FLOAT_AEABI_FLAGS + _WRAP_FLOAT_SCI_FLAGS,
|
||||
linkopts = _WRAP_FLOAT_AEABI_ARITHMETIC_FLAGS + _WRAP_FLOAT_AEABI_CMP_FLAGS + _WRAP_FLOAT_AEABI_CONV_32_FLAGS + _WRAP_FLOAT_AEABI_CONV_64_FLAGS + _WRAP_FLOAT_AEABI_CONV_DOUBLE_FLAGS + _WRAP_FLOAT_SQRTF_FLAGS + _WRAP_FLOAT_SCI_FLAGS + _WRAP_FLOAT_SCI_EXTRA_FLAGS,
|
||||
target_compatible_with = compatible_with_rp2(),
|
||||
visibility = ["//visibility:private"],
|
||||
deps = [
|
||||
|
@@ -18,13 +18,15 @@
|
||||
$<IF:$<BOOL:$<TARGET_PROPERTY:PICO_TARGET_FLOAT_IMPL>>,$<TARGET_PROPERTY:PICO_TARGET_FLOAT_IMPL>,${PICO_DEFAULT_FLOAT_IMPL}>)
|
||||
|
||||
function(wrap_float_functions TARGET)
|
||||
cmake_parse_arguments(WRAP_FLOAT "NO_WRAP_AEABI;NO_WRAP_SCI" "" "" ${ARGN} )
|
||||
if (NOT WRAP_FLOAT_NO_WRAP_AEABI)
|
||||
cmake_parse_arguments(WRAP_FLOAT "NO_AEABI_ARITHMETIC;NO_AEABI_CMP;NO_AEABI_CONV_32;NO_AEABI_CONV_64;NO_AEABI_CONV_DOUBLE;NO_SQRTF;NO_SCI;NO_SCI_EXTRA" "" "" ${ARGN} )
|
||||
if (NOT WRAP_FLOAT_NO_AEABI_ARITHMETIC)
|
||||
pico_wrap_function(${TARGET} __aeabi_fadd)
|
||||
pico_wrap_function(${TARGET} __aeabi_fdiv)
|
||||
pico_wrap_function(${TARGET} __aeabi_fmul)
|
||||
pico_wrap_function(${TARGET} __aeabi_frsub)
|
||||
pico_wrap_function(${TARGET} __aeabi_fsub)
|
||||
endif()
|
||||
if (NOT WRAP_FLOAT_NO_AEABI_CMP)
|
||||
pico_wrap_function(${TARGET} __aeabi_cfcmpeq)
|
||||
pico_wrap_function(${TARGET} __aeabi_cfrcmple)
|
||||
pico_wrap_function(${TARGET} __aeabi_cfcmple)
|
||||
@@ -34,32 +36,42 @@
|
||||
pico_wrap_function(${TARGET} __aeabi_fcmpge)
|
||||
pico_wrap_function(${TARGET} __aeabi_fcmpgt)
|
||||
pico_wrap_function(${TARGET} __aeabi_fcmpun)
|
||||
endif()
|
||||
if (NOT WRAP_FLOAT_NO_AEABI_CONV_32)
|
||||
pico_wrap_function(${TARGET} __aeabi_i2f)
|
||||
pico_wrap_function(${TARGET} __aeabi_l2f)
|
||||
pico_wrap_function(${TARGET} __aeabi_ui2f)
|
||||
pico_wrap_function(${TARGET} __aeabi_ul2f)
|
||||
pico_wrap_function(${TARGET} __aeabi_f2iz)
|
||||
pico_wrap_function(${TARGET} __aeabi_f2lz)
|
||||
pico_wrap_function(${TARGET} __aeabi_f2uiz)
|
||||
endif()
|
||||
if (NOT WRAP_FLOAT_NO_AEABI_CONV_64)
|
||||
pico_wrap_function(${TARGET} __aeabi_l2f)
|
||||
pico_wrap_function(${TARGET} __aeabi_ul2f)
|
||||
pico_wrap_function(${TARGET} __aeabi_f2lz)
|
||||
pico_wrap_function(${TARGET} __aeabi_f2ulz)
|
||||
endif()
|
||||
if (NOT WRAP_FLOAT_NO_AEABI_CONV_DOUBLE)
|
||||
pico_wrap_function(${TARGET} __aeabi_f2d)
|
||||
endif()
|
||||
# separate as we have a direct DCP version
|
||||
if (NOT WRAP_FLOAT_NO_SQRTF)
|
||||
pico_wrap_function(${TARGET} sqrtf)
|
||||
endif()
|
||||
if (NOT WRAP_FLOAT_NO_WRAP_SCI)
|
||||
if (NOT WRAP_FLOAT_NO_SCI)
|
||||
pico_wrap_function(${TARGET} cosf)
|
||||
pico_wrap_function(${TARGET} sinf)
|
||||
pico_wrap_function(${TARGET} tanf)
|
||||
pico_wrap_function(${TARGET} atan2f)
|
||||
pico_wrap_function(${TARGET} expf)
|
||||
pico_wrap_function(${TARGET} logf)
|
||||
|
||||
pico_wrap_function(${TARGET} sincosf) # gnu
|
||||
endif()
|
||||
if (NOT WRAP_FLOAT_NO_SCI_EXTRA)
|
||||
pico_wrap_function(${TARGET} ldexpf)
|
||||
pico_wrap_function(${TARGET} copysignf)
|
||||
pico_wrap_function(${TARGET} truncf)
|
||||
pico_wrap_function(${TARGET} floorf)
|
||||
pico_wrap_function(${TARGET} ceilf)
|
||||
pico_wrap_function(${TARGET} roundf)
|
||||
pico_wrap_function(${TARGET} sincosf) # gnu
|
||||
pico_wrap_function(${TARGET} asinf)
|
||||
pico_wrap_function(${TARGET} acosf)
|
||||
pico_wrap_function(${TARGET} atanf)
|
||||
@@ -93,7 +105,9 @@
|
||||
)
|
||||
|
||||
target_link_libraries(pico_float_none INTERFACE pico_float_headers)
|
||||
wrap_float_functions(pico_float_none)
|
||||
wrap_float_functions(pico_float_none) # we wrap all functions
|
||||
# be explicit that there should be no floating point instructions
|
||||
target_compile_options(pico_float_none INTERFACE -msoft-float)
|
||||
|
||||
pico_add_library(pico_float_pico)
|
||||
if (PICO_RP2040)
|
||||
@@ -107,21 +121,52 @@
|
||||
target_link_libraries(pico_float_pico INTERFACE pico_bootrom pico_float_headers hardware_divider)
|
||||
elseif(NOT PICO_RISCV)
|
||||
pico_add_library(pico_float_pico_dcp)
|
||||
# todo what functions from float_math belong in each case; should some be left to GCC on RP2350?
|
||||
target_sources(pico_float_pico_dcp INTERFACE
|
||||
${CMAKE_CURRENT_LIST_DIR}/float_math.c
|
||||
${CMAKE_CURRENT_LIST_DIR}/float_aeabi_dcp.S
|
||||
${CMAKE_CURRENT_LIST_DIR}/float_common_m33.S
|
||||
${CMAKE_CURRENT_LIST_DIR}/float_sci_m33.S
|
||||
${CMAKE_CURRENT_LIST_DIR}/float_conv_m33.S
|
||||
)
|
||||
|
||||
wrap_float_functions(pico_float_pico_dcp NO_WRAP_AEABI)
|
||||
# NOTE the main reason for using pico_float_pico_dcp is presumably that you
|
||||
# don't want to use VFP at all, so turn off compiler support, otherwise, it will inline usages
|
||||
target_compile_options(pico_float_pico_dcp INTERFACE -msoft-float)
|
||||
|
||||
wrap_float_functions(pico_float_pico_dcp
|
||||
# we wrap all functions as we don't want to use VFP (or compiler versions) at all
|
||||
#NO_AEABI_ARITHMETIC
|
||||
#NO_AEABI_CMP
|
||||
#NO_AEABI_CONV_32
|
||||
#NO_AEABI_CONV_64
|
||||
#NO_AEABI_CONV_DOUBLE
|
||||
#NO_SQRTF
|
||||
#NO_SCI
|
||||
#NO_SCI_EXTRA
|
||||
)
|
||||
|
||||
pico_add_library(pico_float_pico_vfp)
|
||||
target_sources(pico_float_pico_vfp INTERFACE
|
||||
${CMAKE_CURRENT_LIST_DIR}/float_math.c
|
||||
${CMAKE_CURRENT_LIST_DIR}/float_conv32_vfp.S
|
||||
${CMAKE_CURRENT_LIST_DIR}/float_common_m33.S
|
||||
${CMAKE_CURRENT_LIST_DIR}/float_sci_m33_vfp.S
|
||||
${CMAKE_CURRENT_LIST_DIR}/float_conv_m33.S
|
||||
)
|
||||
wrap_float_functions(pico_float_pico_vfp NO_WRAP_AEABI)
|
||||
wrap_float_functions(pico_float_pico_vfp
|
||||
# for these 3, arguably compiler is probably inlining anyway, but use the cmopiler's
|
||||
# version for explicit AEABI calls
|
||||
NO_AEABI_ARITHMETIC
|
||||
NO_AEABI_CMP
|
||||
NO_AEABI_CONV_32
|
||||
#NO_AEABI_CONV_64 # we have optimized M33 versions
|
||||
NO_AEABI_CONV_DOUBLE
|
||||
# we don't have an optimized vfp or m33 sqrtf available
|
||||
NO_SQRTF
|
||||
#NO_SCI # we have optimized VFP versions
|
||||
#NO_SCI_EXTRA # todo - are our versions better than what GCC proides?
|
||||
)
|
||||
|
||||
|
||||
target_link_libraries(pico_float_pico INTERFACE
|
||||
pico_float_pico_vfp)
|
||||
else()
|
||||
|
@@ -5,15 +5,17 @@
|
||||
*/
|
||||
|
||||
#include "pico/asm_helper.S"
|
||||
#if HAS_DOUBLE_COPROCESSOR
|
||||
|
||||
#if !HAS_DOUBLE_COPROCESSOR
|
||||
#error attempt to compile float_aeabi_dcp when there is no DCP
|
||||
#else
|
||||
|
||||
#include "hardware/dcp_instr.inc.S"
|
||||
#include "hardware/dcp_canned.inc.S"
|
||||
|
||||
pico_default_asm_setup
|
||||
|
||||
// todo alignment
|
||||
//__pre_init __aeabi_float_init, 00020
|
||||
// factor out save/restore (there is a copy in double code)
|
||||
// todo factor out save/restore (there is a copy in double code)
|
||||
|
||||
.macro float_section name
|
||||
#if PICO_FLOAT_IN_RAM
|
||||
@@ -29,7 +31,7 @@ float_section WRAPPER_FUNC_NAME(\func)
|
||||
|
||||
// ============== STATE SAVE AND RESTORE ===============
|
||||
|
||||
.macro saving_func func
|
||||
.macro saving_func type func, opt_label1='-', opt_label2='-'
|
||||
// Note we are usually 32-bit aligned already at this point, as most of the
|
||||
// function bodies contain exactly two 16-bit instructions: bmi and bx lr.
|
||||
// We want the PCMP word-aligned.
|
||||
@@ -41,8 +43,14 @@ float_section WRAPPER_FUNC_NAME(\func)
|
||||
push {lr} // 16-bit instruction
|
||||
bl generic_save_state // 32-bit instruction
|
||||
b 1f // 16-bit instruction
|
||||
.ifnc \opt_label1,'-'
|
||||
regular_func \opt_label1
|
||||
.endif
|
||||
.ifnc \opt_label2,'-'
|
||||
regular_func \opt_label2
|
||||
.endif
|
||||
// This is the actual entry point:
|
||||
wrapper_func \func
|
||||
\type\()_func \func
|
||||
PCMP apsr_nzcv
|
||||
bmi 1b
|
||||
1:
|
||||
@@ -82,115 +90,208 @@ generic_restore_state:
|
||||
// ============== ARITHMETIC FUNCTIONS ===============
|
||||
|
||||
float_wrapper_section __aeabi_fadd
|
||||
saving_func __aeabi_fadd
|
||||
saving_func wrapper __aeabi_fadd
|
||||
dcp_fadd_m r0,r0,r1
|
||||
saving_func_return
|
||||
|
||||
float_wrapper_section __aeabi_fsub
|
||||
saving_func __aeabi_fsub
|
||||
saving_func wrapper __aeabi_fsub
|
||||
dcp_fsub_m r0,r0,r1
|
||||
saving_func_return
|
||||
|
||||
float_wrapper_section __aeabi_frsub
|
||||
saving_func __aeabi_frsub
|
||||
saving_func wrapper __aeabi_frsub
|
||||
dcp_fsub_m r0,r1,r0
|
||||
saving_func_return
|
||||
|
||||
float_wrapper_section __aeabi_fmul
|
||||
saving_func __aeabi_fmul
|
||||
saving_func wrapper __aeabi_fmul
|
||||
dcp_fmul_m r0,r0,r1,r0,r1
|
||||
saving_func_return
|
||||
|
||||
float_section fdiv_fast
|
||||
saving_func fdiv_fast
|
||||
saving_func regular fdiv_fast
|
||||
dcp_fdiv_fast_m r0,r0,r1,r0,r1,r2
|
||||
saving_func_return
|
||||
|
||||
float_wrapper_section __aeabi_fdiv
|
||||
saving_func __aeabi_fdiv
|
||||
saving_func wrapper __aeabi_fdiv
|
||||
@ with correct rounding
|
||||
dcp_fdiv_m r0,r0,r1,r0,r1,r2,r3
|
||||
saving_func_return
|
||||
|
||||
float_section sqrtf_fast
|
||||
saving_func sqrtf_fast
|
||||
saving_func regular sqrtf_fast
|
||||
dcp_fsqrt_fast_m r0,r0,r0,r1,r2,r3
|
||||
saving_func_return
|
||||
|
||||
float_wrapper_section sqrtf
|
||||
saving_func sqrtf
|
||||
saving_func wrapper sqrtf
|
||||
@ with correct rounding
|
||||
dcp_fsqrt_m r0,r0,r0,r1,r2,r3
|
||||
saving_func_return
|
||||
|
||||
// todo not a real thing
|
||||
float_wrapper_section __aeabi_fclassify
|
||||
saving_func __aeabi_fclassify
|
||||
float_section fclassify
|
||||
saving_func regular fclassify
|
||||
dcp_fclassify_m apsr_nzcv,r0
|
||||
saving_func_return
|
||||
|
||||
// ============== CONVERSION FUNCTIONS ===============
|
||||
|
||||
float_wrapper_section __aeabi_f2d
|
||||
saving_func __aeabi_f2d
|
||||
saving_func wrapper __aeabi_f2d float2double
|
||||
dcp_float2double_m r0,r1,r0
|
||||
saving_func_return
|
||||
|
||||
float_wrapper_section __aeabi_i2f
|
||||
saving_func __aeabi_i2f
|
||||
saving_func wrapper __aeabi_i2f int2float
|
||||
@ with rounding
|
||||
dcp_int2float_m r0,r0
|
||||
saving_func_return
|
||||
|
||||
float_wrapper_section __aeabi_ui2f
|
||||
saving_func __aeabi_ui2f
|
||||
saving_func wrapper __aeabi_ui2f uint2float
|
||||
@ with rounding
|
||||
dcp_uint2float_m r0,r0
|
||||
saving_func_return
|
||||
|
||||
float_section float2fix_z
|
||||
regular_func float2fix_z
|
||||
ubfx r2, r0, #23, #8
|
||||
cbz r2, 2f // input is zero or denormal
|
||||
cmp r2, #0xff
|
||||
beq 3f // input infinite or nan
|
||||
adds r2, r1
|
||||
ble 2f // modified input is denormal so zero
|
||||
cmp r2, #0xff
|
||||
beq 3f // modified input is infinite
|
||||
1:
|
||||
bfi r0, r2, #23, #8
|
||||
b float2int_z_entry
|
||||
2:
|
||||
movs r0, #0
|
||||
bx lr
|
||||
3:
|
||||
mvn r1, #0x80000000
|
||||
add r0, r1, r0, lsr#31 @ so -Inf → 0x80000000, +Inf → 0x7fffffff
|
||||
bx lr
|
||||
|
||||
float_wrapper_section __aeabi_f2iz
|
||||
saving_func __aeabi_f2iz
|
||||
saving_func wrapper __aeabi_f2iz float2int_z
|
||||
@ with truncation towards 0
|
||||
float2int_z_entry:
|
||||
dcp_float2int_m r0,r0
|
||||
saving_func_return
|
||||
|
||||
float_section __aeabi_f2ufix
|
||||
regular_func float2ufix
|
||||
regular_func float2ufix_z
|
||||
ubfx r2, r0, #23, #8
|
||||
cbz r2, 2f // input is zero or denormal
|
||||
cmp r2, #0xff
|
||||
beq 3f // input infinite or nan
|
||||
adds r2, r1
|
||||
ble 2f // modified input is denormal so zero
|
||||
cmp r2, #0xff
|
||||
beq 3f // modified input is infinite
|
||||
1:
|
||||
bfi r0, r2, #23, #8
|
||||
b float2uint_z_entry
|
||||
2:
|
||||
movs r0, #0
|
||||
bx lr
|
||||
3:
|
||||
mvn r0, r0, asr #31
|
||||
bx lr
|
||||
|
||||
float_wrapper_section __aeabi_f2uiz
|
||||
saving_func __aeabi_f2uiz
|
||||
saving_func wrapper __aeabi_f2uiz float2uint_z float2uint
|
||||
@ with truncation towards 0
|
||||
float2uint_z_entry:
|
||||
dcp_float2uint_m r0,r0
|
||||
saving_func_return
|
||||
|
||||
// todo not a real thing
|
||||
float_section conv_f2fix
|
||||
saving_func regular float2fix
|
||||
ubfx r2, r0, #23, #8
|
||||
cbz r2, 2f // input is zero or denormal
|
||||
cmp r2, #0xff
|
||||
beq 3f // input infinite or nan
|
||||
adds r2, r1
|
||||
ble 2f // modified input is denormal so zero
|
||||
cmp r2, #0xff
|
||||
beq 3f // modified input is infinite
|
||||
1:
|
||||
bfi r0, r2, #23, #8
|
||||
b float2int_entry
|
||||
2:
|
||||
movs r0, #0
|
||||
bx lr
|
||||
3:
|
||||
mvn r1, #0x80000000
|
||||
add r0, r1, r0, lsr#31 @ so -Inf → 0x80000000, +Inf → 0x7fffffff
|
||||
bx lr
|
||||
|
||||
float_section float2int
|
||||
// (not a real thing - kept because we use wrapper in saving_func)
|
||||
saving_func regular float2int
|
||||
float2int_entry:
|
||||
lsls r1, r0, #1
|
||||
// r0 = abs(zero) => r1 = 0x00000000
|
||||
// r0 = abs(denornaml) => r1 = 0x00xxxxxx
|
||||
// r0 = abs(1.0f) => r1 = 0x7f000000
|
||||
// r0 = abs(inf/nan) => r1 = 0xffxxxxxx
|
||||
bls float2int_z_entry // input positive or zero or -zero are ok for int64_z
|
||||
lsrs r1, #24
|
||||
beq float2int_z_entry // input denormal is flushed to zero anyway
|
||||
subs r1, #0x7f
|
||||
bcc 1f // input < 1.0f means we need to subtract 1 after conversion
|
||||
// mask off all but fractional bits
|
||||
lsls r2, r0, r1
|
||||
lsls r2, #9
|
||||
beq float2int_z_entry // input is integer
|
||||
1:
|
||||
WXFC r0, r0
|
||||
ADD0
|
||||
ADD1
|
||||
NTDC
|
||||
RDIC r0
|
||||
subs r0, #1
|
||||
saving_func_return
|
||||
|
||||
#if 0 // not sure these are super useful; if they are we should give them names
|
||||
float_wrapper_section __aeabi_f2i_r
|
||||
saving_func __aeabi_f2i_r
|
||||
// (not a real thing - kept because we use wrapper in saving_func)
|
||||
saving_func wrapper __aeabi_f2i_r
|
||||
@ with rounding
|
||||
dcp_float2int_r_m r0,r0
|
||||
saving_func_return
|
||||
|
||||
// todo not a real thing
|
||||
float_wrapper_section __aeabi_f2ui_r
|
||||
saving_func __aeabi_f2ui_r
|
||||
// (not a real thing - kept because we use wrapper in saving_func)
|
||||
saving_func wrapper __aeabi_f2ui_r
|
||||
@ with rounding
|
||||
dcp_float2uint_r_m r0,r0
|
||||
saving_func_return
|
||||
#endif
|
||||
|
||||
// ============== COMPARISON FUNCTIONS ===============
|
||||
|
||||
float_wrapper_section __aeabi_fcmpun
|
||||
saving_func __aeabi_fcmpun
|
||||
saving_func wrapper __aeabi_fcmpun
|
||||
dcp_fcmp_m r0,r0,r1
|
||||
// extract unordered bit
|
||||
ubfx r0, r0, #28, #1
|
||||
saving_func_return
|
||||
|
||||
float_wrapper_section __aeabi_fcmp
|
||||
saving_func __aeabi_cfrcmple
|
||||
saving_func wrapper __aeabi_cfrcmple
|
||||
dcp_fcmp_m apsr_nzcv,r1,r0 // with arguments reversed
|
||||
bvs cmp_nan
|
||||
saving_func_return
|
||||
|
||||
// these next two can be the same function in the absence of exceptions
|
||||
saving_func __aeabi_cfcmple
|
||||
saving_func wrapper __aeabi_cfcmple
|
||||
dcp_fcmp_m apsr_nzcv,r0,r1
|
||||
bvs cmp_nan
|
||||
saving_func_return
|
||||
@@ -198,7 +299,7 @@ saving_func __aeabi_cfcmple
|
||||
// It is not clear from the ABI documentation whether cfcmpeq must set the C flag
|
||||
// in the same way as cfcmple. If not, we could save the "bvs" below; but we
|
||||
// err on the side of caution.
|
||||
saving_func __aeabi_cfcmpeq
|
||||
saving_func wrapper __aeabi_cfcmpeq
|
||||
dcp_fcmp_m apsr_nzcv,r0,r1
|
||||
bvs cmp_nan
|
||||
saving_func_return
|
||||
@@ -212,14 +313,14 @@ cmp_nan:
|
||||
saving_func_return
|
||||
|
||||
float_wrapper_section __aeabi_fcmpeq
|
||||
saving_func __aeabi_fcmpeq
|
||||
saving_func wrapper __aeabi_fcmpeq
|
||||
dcp_fcmp_m r0,r0,r1
|
||||
// extract Z
|
||||
ubfx r0, r0, #30, #1
|
||||
saving_func_return
|
||||
|
||||
float_wrapper_section __aeabi_fcmplt
|
||||
saving_func __aeabi_fcmplt
|
||||
saving_func wrapper __aeabi_fcmplt
|
||||
dcp_fcmp_m apsr_nzcv,r1,r0
|
||||
ite hi
|
||||
movhi r0,#1
|
||||
@@ -227,7 +328,7 @@ saving_func __aeabi_fcmplt
|
||||
saving_func_return
|
||||
|
||||
float_wrapper_section __aeabi_fcmple
|
||||
saving_func __aeabi_fcmple
|
||||
saving_func wrapper __aeabi_fcmple
|
||||
dcp_fcmp_m apsr_nzcv,r1,r0
|
||||
ite hs
|
||||
movhs r0,#1
|
||||
@@ -235,7 +336,7 @@ saving_func __aeabi_fcmple
|
||||
saving_func_return
|
||||
|
||||
float_wrapper_section __aeabi_fcmpge
|
||||
saving_func __aeabi_fcmpge
|
||||
saving_func wrapper __aeabi_fcmpge
|
||||
dcp_fcmp_m apsr_nzcv,r0,r1
|
||||
ite hs
|
||||
movhs r0,#1
|
||||
@@ -243,7 +344,7 @@ saving_func __aeabi_fcmpge
|
||||
saving_func_return
|
||||
|
||||
float_wrapper_section __aeabi_fcmpgt
|
||||
saving_func __aeabi_fcmpgt
|
||||
saving_func wrapper __aeabi_fcmpgt
|
||||
dcp_fcmp_m apsr_nzcv,r0,r1
|
||||
ite hi
|
||||
movhi r0,#1
|
||||
|
@@ -471,17 +471,36 @@ float_section float2int
|
||||
regular_func float2int
|
||||
shimmable_table_tail_call SF_TABLE_FLOAT2INT float2int_shim
|
||||
|
||||
float_section float2fix_z
|
||||
regular_func float2fix_z
|
||||
cmn r0, r0
|
||||
bcc float2fix
|
||||
push {lr}
|
||||
lsls r0, #1
|
||||
lsrs r0, #1
|
||||
bl float2ufix_z
|
||||
cmp r0, #0
|
||||
bmi 1f
|
||||
negs r0, r0
|
||||
pop {pc}
|
||||
1:
|
||||
movs r0, #128
|
||||
lsls r0, #24
|
||||
pop {pc}
|
||||
|
||||
float_section float2fix
|
||||
regular_func float2fix
|
||||
shimmable_table_tail_call SF_TABLE_FLOAT2FIX float2fix_shim
|
||||
|
||||
float_section float2ufix
|
||||
regular_func float2ufix
|
||||
regular_func float2ufix_z
|
||||
table_tail_call SF_TABLE_FLOAT2UFIX
|
||||
|
||||
// unsigned FUNC_NAME(__aeabi_f2uiz)(float) float (single precision) to unsigned C-style conversion [3]
|
||||
float_wrapper_section __aeabi_f2uiz
|
||||
wrapper_func __aeabi_f2uiz
|
||||
regular_func float2uint
|
||||
regular_func float2uint_z
|
||||
table_tail_call SF_TABLE_FLOAT2UINT
|
||||
|
||||
@@ -530,10 +549,11 @@ wrapper_func __aeabi_f2lz
|
||||
regular_func float2int64_z
|
||||
cmn r0, r0
|
||||
bcc float2int64
|
||||
movs r1, #0
|
||||
float2fix64_z_neg:
|
||||
push {lr}
|
||||
lsls r0, #1
|
||||
lsrs r0, #1
|
||||
movs r1, #0
|
||||
bl float2ufix64
|
||||
cmp r1, #0
|
||||
bmi 1f
|
||||
@@ -553,17 +573,24 @@ regular_func float2int64
|
||||
shimmable_table_tail_call SF_TABLE_FLOAT2INT64 float2int64_shim
|
||||
|
||||
float_section float2fix64
|
||||
regular_func float2fix64_z
|
||||
cmn r0, r0
|
||||
bcs float2fix64_z_neg
|
||||
// fall thru
|
||||
|
||||
regular_func float2fix64
|
||||
shimmable_table_tail_call SF_TABLE_FLOAT2FIX64 float2fix64_shim
|
||||
|
||||
// unsigned long long FUNC_NAME(__aeabi_f2ulz)(float) float to unsigned long long C-style conversion [3]
|
||||
float_wrapper_section __aeabi_f2ulz
|
||||
wrapper_func __aeabi_f2ulz
|
||||
regular_func float2uint64
|
||||
regular_func float2uint64_z
|
||||
shimmable_table_tail_call SF_TABLE_FLOAT2UINT64 float2uint64_shim
|
||||
|
||||
float_section float2ufix64
|
||||
regular_func float2ufix64
|
||||
regular_func float2ufix64_z
|
||||
shimmable_table_tail_call SF_TABLE_FLOAT2UFIX64 float2ufix64_shim
|
||||
|
||||
float_wrapper_section __aeabi_f2d
|
||||
|
@@ -241,7 +241,52 @@ regular_func ufix642float
|
||||
bxlo r14
|
||||
b 3b
|
||||
|
||||
float_wrapper_section conv_ftoi64
|
||||
float_section conv_ftoi64
|
||||
regular_func float2int64
|
||||
lsls r1, r0, #1
|
||||
// r0 = abs(zero) => r1 = 0x00000000
|
||||
// r0 = abs(denornaml) => r1 = 0x00xxxxxx
|
||||
// r0 = abs(1.0f) => r1 = 0x7f000000
|
||||
// r0 = abs(inf/nan) => r1 = 0xffxxxxxx
|
||||
bls float2int64_z // positive or zero or -zero are ok for int64_z
|
||||
lsrs r1, #24
|
||||
subs r1, #0x7f
|
||||
bcc 1f // <1 means subtract 1
|
||||
// mask off all but fractional bits
|
||||
lsls r2, r0, r1
|
||||
lsls r2, #9
|
||||
beq float2int64_z // integer
|
||||
1:
|
||||
push {lr}
|
||||
bl float2int64_z
|
||||
subs r0, #1
|
||||
sbcs r1, r1, #0
|
||||
pop {pc}
|
||||
|
||||
float_section conv_ftof64
|
||||
regular_func float2fix64
|
||||
lsls r2, r0, #1
|
||||
// r0 = abs(zero) => r1 = 0x00000000
|
||||
// r0 = abs(denornaml) => r1 = 0x00xxxxxx
|
||||
// r0 = abs(1.0f) => r1 = 0x7f000000
|
||||
// r0 = abs(inf/nan) => r1 = 0xffxxxxxx
|
||||
bls float2fix64_z // positive or zero or -zero are ok for fix64_z
|
||||
lsrs r2, #24
|
||||
rsbs r3, r1, #0x7f
|
||||
subs r2, r3
|
||||
bcc 1f // <1 means subtract 1
|
||||
// mask off all but fractional bits
|
||||
lsls r2, r0, r2
|
||||
lsls r2, #9
|
||||
beq float2fix64_z // integer
|
||||
1:
|
||||
push {lr}
|
||||
bl float2fix64_z
|
||||
subs r0, #1
|
||||
sbcs r1, r1, #0
|
||||
pop {pc}
|
||||
|
||||
float_wrapper_section conv_ftoi64z
|
||||
|
||||
@ convert float to signed int64, rounding towards 0, clamping
|
||||
wrapper_func __aeabi_f2lz
|
||||
@@ -318,7 +363,7 @@ regular_func float2uint64_z
|
||||
movs r1,#0 @ fall through
|
||||
@ convert float in r0 to unsigned fixed point in r0:r1, clamping
|
||||
regular_func float2ufix64
|
||||
//regular_func float2ufix64_z
|
||||
regular_func float2ufix64_z
|
||||
subs r1,#0x96 @ remove exponent bias, compensate for mantissa length
|
||||
asrs r2,r0,#23 @ sign and exponent
|
||||
sub r3,r2,#1
|
106
src/rp2_common/pico_float/float_conv32_vfp.S
Normal file
106
src/rp2_common/pico_float/float_conv32_vfp.S
Normal file
@@ -0,0 +1,106 @@
|
||||
/*
|
||||
* Copyright (c) 2024 Raspberry Pi (Trading) Ltd.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
*/
|
||||
|
||||
#if !PICO_RP2040
|
||||
#include "pico/asm_helper.S"
|
||||
|
||||
pico_default_asm_setup
|
||||
|
||||
.macro float_section name
|
||||
#if PICO_FLOAT_IN_RAM
|
||||
.section RAM_SECTION_NAME(\name), "ax"
|
||||
#else
|
||||
.section SECTION_NAME(\name), "ax"
|
||||
#endif
|
||||
.endm
|
||||
|
||||
float_section int2float
|
||||
regular_func int2float
|
||||
vmov s15, r0
|
||||
vcvt.f32.s32 s15, s15
|
||||
vmov r0, s15
|
||||
bx lr
|
||||
|
||||
float_section uint2float
|
||||
regular_func uint2float
|
||||
vmov s15, r0
|
||||
vcvt.f32.u32 s15, s15
|
||||
vmov r0, s15
|
||||
bx lr
|
||||
|
||||
float_section float2int
|
||||
regular_func float2int
|
||||
vmov s15, r0
|
||||
vcvtm.s32.f32 s15, s15
|
||||
vmov r0, s15
|
||||
bx lr
|
||||
|
||||
float_section float2int_z
|
||||
regular_func float2int_z
|
||||
vmov s15, r0
|
||||
vcvt.s32.f32 s15, s15
|
||||
vmov r0, s15
|
||||
bx lr
|
||||
|
||||
float_section float2uint
|
||||
regular_func float2uint
|
||||
regular_func float2uint_z
|
||||
vmov s15, r0
|
||||
vcvt.u32.f32 s15, s15
|
||||
vmov r0, s15
|
||||
bx lr
|
||||
|
||||
float_section float2fix_z
|
||||
regular_func float2fix_z
|
||||
ubfx r2, r0, #23, #8
|
||||
adds r2, r1
|
||||
asrs r3, r2, #8
|
||||
beq 1f
|
||||
ite pl
|
||||
movpl r2, #0xff
|
||||
movmi r2, #0
|
||||
1:
|
||||
bfi r0, r2, #23, #8
|
||||
b float2int_z
|
||||
|
||||
float_section float2fix
|
||||
regular_func float2fix
|
||||
lsls r2, r0, #1
|
||||
// r0 = abs(zero) => r1 = 0x00000000
|
||||
// r0 = abs(denornaml) => r1 = 0x00xxxxxx
|
||||
// r0 = abs(1.0f) => r1 = 0x7f000000
|
||||
// r0 = abs(inf/nan) => r1 = 0xffxxxxxx
|
||||
bls float2fix_z // input positive or zero or -zero are ok for fix_z
|
||||
lsrs r2, #24
|
||||
beq float2fix_z // input denormal will be flushed to zero
|
||||
rsbs r3, r1, #0x7f
|
||||
subs r2, r3
|
||||
bcc 1f // iunput <1.0f means we need to subtract 1
|
||||
// mask off all but fractional bits
|
||||
lsls r2, r0, r2
|
||||
lsls r2, #9
|
||||
beq float2fix_z // input is integer
|
||||
1:
|
||||
push {lr}
|
||||
bl float2fix_z
|
||||
subs r0, #1
|
||||
sbcs r1, r1, #0
|
||||
pop {pc}
|
||||
|
||||
float_section float2ufix
|
||||
regular_func float2ufix
|
||||
regular_func float2ufix_z
|
||||
ubfx r2, r0, #23, #8
|
||||
adds r2, r1
|
||||
asrs r3, r2, #8
|
||||
beq 1f
|
||||
ite pl
|
||||
movpl r2, #0xff
|
||||
movmi r2, #0
|
||||
1:
|
||||
bfi r0, r2, #23, #8
|
||||
b float2uint_z
|
||||
#endif
|
@@ -21,68 +21,296 @@ extern "C" {
|
||||
*
|
||||
* \brief Optimized single-precision floating point functions
|
||||
*
|
||||
* (Replacement) optimized implementations are provided for the following compiler built-ins
|
||||
* and math library functions on Arm:
|
||||
* An application can take control of the floating point routines used in the application over and above what is provided by the compiler,
|
||||
* by depending on the pico_float library. A user might want to do this
|
||||
*
|
||||
* - __aeabi_fadd, __aeabi_fdiv, __aeabi_fmul, __aeabi_frsub, __aeabi_fsub, __aeabi_cfcmpeq, __aeabi_cfrcmple, __aeabi_cfcmple, __aeabi_fcmpeq, __aeabi_fcmplt, __aeabi_fcmple, __aeabi_fcmpge, __aeabi_fcmpgt, __aeabi_fcmpun, __aeabi_i2f, __aeabi_l2f, __aeabi_ui2f, __aeabi_ul2f, __aeabi_f2iz, __aeabi_f2lz, __aeabi_f2uiz, __aeabi_f2ulz, __aeabi_f2d, sqrtf, cosf, sinf, tanf, atan2f, expf, logf
|
||||
* - ldexpf, copysignf, truncf, floorf, ceilf, roundf, asinf, acosf, atanf, sinhf, coshf, tanhf, asinhf, acoshf, atanhf, exp2f, log2f, exp10f, log10f, powf, hypotf, cbrtf, fmodf, dremf, remainderf, remquof, expm1f, log1pf, fmaf
|
||||
* - powintf, sincosf (GNU extensions)
|
||||
* 1. To use optimized software implementations provided by the RP2-series device's bootrom or the SDK
|
||||
* 2. To use optimized combined software/hardware implementations utilizing custom RP2-series hardware for acceleration
|
||||
* 3. To control the amount of C compiler/library code bloat
|
||||
* 4. To make sure no floating point is called at all
|
||||
*
|
||||
* The following additional optimized functions are also provided:
|
||||
* The pico_float library comes in three main flavors:
|
||||
*
|
||||
* - int2float, uint2float, int642float, uint642float, fix2float, ufix2float, fix642float, ufix642float
|
||||
* - float2fix, float2ufix, float2fix64, float2ufix64, float2int, float2uint, float2int64, float2uint64, float2int_z, float2int64_z, float2uint_z, float2uint64_z
|
||||
* - exp10f, sincosf, powintf
|
||||
* 1. `pico_float_none` - all floating point operations cause a \ref panic - no single-precision floating point code is included
|
||||
* 2. `pico_float_compiler` - no custom functions are provided; all single-precision floating point is handled by the C compiler/library
|
||||
* 3. `pico_float_pico` - the smallest and fastest available for the platform, along with additional functionality (e.g. fixed point conversions) which are detailed below
|
||||
*
|
||||
* On RP2350 (Arm) the following additional functions are available; the _fast methods are faster but do not round correctly
|
||||
* The user can control which version they want (e.g. **pico_float_xxx** by either setting the CMake global variable
|
||||
* `PICO_DEFAULT_FLOAT_IMPL=xxx`, or by using the CMake function `pico_set_float_implementation(<TARGET> xxx)`. Note that in the absence
|
||||
* of either, pico_float_pico is used by default.
|
||||
*
|
||||
* - float2fix64_z, fdiv_fast, fsqrt_fast,
|
||||
* \if rp2040_specific
|
||||
* On RP2040, `pico_float_pico` uses optimized hand coded implementations from the bootrom and the SDK for both
|
||||
* basic single-precision floating point operations and floating point math library functions. These implementations
|
||||
* are generally faster and smaller than those provided by the C compiler/library, though they don't support all the features of a fully compliant
|
||||
* floating point implementation; they are however usually fine for the majority of cases
|
||||
* \endif
|
||||
*
|
||||
* On RP2350 RISC-V, only a small number of compiler runtime functions are overridden with faster implementations:
|
||||
* \if rp2350_specific
|
||||
* On Arm on RP2350, there are multiple options for `pico_float_pico`:
|
||||
*
|
||||
* - __addsf3, __subsf3, __mulsf3
|
||||
* 1. `pico_float_pico_vfp` - this library leaves basic C single-precision floating point operations to the compiler
|
||||
* which can use inlined VFP (Arm FPU) code. Custom optimized versions of trigonometric and scientific functions are provided.
|
||||
* No DCP (RP2350 Double co-processor) instructions are used.
|
||||
* 2. `pico_float_pico_dcp` - this library prevents the compiler injecting inlined VFP code, and also implements
|
||||
* all single-precision floating point operations in optimized DCP or M33 code. This option is not quite as fast
|
||||
* as pico_float_pico_vfp, however it allows floating point operations without enabling the floating point co-processor
|
||||
* on the CPU; this can be beneficial in certain circumstances, e.g. where leaving stack in tasks or interrupts
|
||||
* for the floating point state is undesirable.
|
||||
*
|
||||
* Note: `pico_float_pico` is equivalent to `pico_float_pico_vfp` on RP2350, as this is the most sensible default
|
||||
* \endif
|
||||
*
|
||||
* On Arm, (replacement) optimized implementations are provided for the following compiler built-ins
|
||||
* and math library functions when using `_pico` variants of `pico_float`:
|
||||
*
|
||||
* - basic arithmetic: (except `pico_float_pico_vfp`)
|
||||
*
|
||||
* __aeabi_fadd, __aeabi_fdiv, __aeabi_fmul, __aeabi_frsub, __aeabi_fsub
|
||||
*
|
||||
* - comparison: (except `pico_float_pico_vfp`)
|
||||
*
|
||||
* __aeabi_cfcmpeq, __aeabi_cfrcmple, __aeabi_cfcmple, __aeabi_fcmpeq, __aeabi_fcmplt, __aeabi_fcmple, __aeabi_fcmpge, __aeabi_fcmpgt, __aeabi_fcmpun
|
||||
*
|
||||
* - (u)int32 <-> float: (except `pico_float_pico_vfp`)
|
||||
*
|
||||
* __aeabi_i2f, __aeabi_ui2f, __aeabi_f2iz, __aeabi_f2uiz
|
||||
*
|
||||
* - (u)int64 <-> float: (except `pico_float_pico_vfp`)
|
||||
*
|
||||
* __aeabi_l2f, __aeabi_ul2f, __aeabi_f2lz, __aeabi_f2ulz
|
||||
*
|
||||
* - float -> double: (except `pico_float_pico_vfp`)
|
||||
*
|
||||
* __aeabi_f2d
|
||||
*
|
||||
* - basic trigonometric:
|
||||
*
|
||||
* sqrtf, cosf, sinf, tanf, atan2f, expf, logf
|
||||
*
|
||||
* - trigonometric and scientific
|
||||
*
|
||||
* ldexpf, copysignf, truncf, floorf, ceilf, roundf, asinf, acosf, atanf, sinhf, coshf, tanhf, asinhf, acoshf, atanhf, exp2f, log2f, exp10f, log10f, powf, hypotf, cbrtf, fmodf, dremf, remainderf, remquof, expm1f, log1pf, fmaf
|
||||
*
|
||||
* - GNU exetnsions:
|
||||
*
|
||||
* powintf, sincosf
|
||||
*
|
||||
* On Arm, the following additional optimized functions are also provided (when using `_pico` variants of `pico_float`):
|
||||
*
|
||||
* - Conversions to/from integer types:
|
||||
*
|
||||
* - (u)int -> float (round to nearest):
|
||||
*
|
||||
* int2float, uint2float, int642float, uint642float
|
||||
*
|
||||
* note: on `pico_float_pico_vfp` the 32-bit functions are also provided as C macros since they map to inline VFP code
|
||||
*
|
||||
* - (u)float -> int (round towards zero):
|
||||
*
|
||||
* float2int_z, float2uint_z, float2int64_z, float2uint64_z
|
||||
*
|
||||
* note: on `pico_float_pico_vfp` the 32-bit functions are also provided as C macros since they map to inline VFP code
|
||||
*
|
||||
* - (u)float -> int (round towards -infinity):
|
||||
*
|
||||
* float2int, float2uint, float2int64, float2uint64
|
||||
*
|
||||
* - Conversions to/from fixed point integers:
|
||||
*
|
||||
* - (u)fix -> float (round to nearest):
|
||||
*
|
||||
* fix2float, ufix2float, fix642float, ufix642float
|
||||
*
|
||||
* - float -> (u)fix (round towards zero):
|
||||
*
|
||||
* float2fix_z, float2ufix_z, float2fix64_z, float2ufix64_z
|
||||
*
|
||||
* note: on `pico_float_pico_vfp` the 32-bit functions are also provided as C macros since they can map to inline VFP code
|
||||
* when the number of fractional bits is a compile time constant between 1 and 32
|
||||
*
|
||||
* - float -> (u)fix (round towards -infinity):
|
||||
*
|
||||
* float2fix, float2ufix, float2fix64, float2ufix64
|
||||
*
|
||||
* note: on `pico_float_pico_vfp` the 32-bit functions are also provided as C macros since they can map to inline VFP code
|
||||
* when the number of fractional bits is a compile time constant between 1 and 32
|
||||
*
|
||||
* - Even faster versions of divide and square-root functions that do not round correctly: (`pico_float_pico_dcp` only)
|
||||
*
|
||||
* fdiv_fast, sqrtf_fast
|
||||
*
|
||||
* \if rp2350_specific
|
||||
* On RISC-V, (replacement) optimized implementations are provided for the following compiler built-ins when using the `pico_float_pico`
|
||||
* library (note that there are no variants of this library like there are on Arm):
|
||||
*
|
||||
* - basic arithmetic:
|
||||
*
|
||||
* __addsf3, __subsf3, __mulsf3
|
||||
* \endif
|
||||
*/
|
||||
|
||||
// None of these functions are available on RISC-V:
|
||||
#if !defined(__riscv) || PICO_COMBINED_DOCS
|
||||
|
||||
float int2float(int32_t f);
|
||||
float uint2float(uint32_t f);
|
||||
float int642float(int64_t f);
|
||||
float uint642float(uint64_t f);
|
||||
#if PICO_COMBINED_DOCS || !LIB_PICO_FLOAT_COMPILER
|
||||
float int2float(int32_t i);
|
||||
float uint2float(uint32_t i);
|
||||
float int642float(int64_t i);
|
||||
float uint642float(uint64_t i);
|
||||
float fix2float(int32_t m, int e);
|
||||
float ufix2float(uint32_t m, int e);
|
||||
float fix642float(int64_t m, int e);
|
||||
float ufix642float(uint64_t m, int e);
|
||||
|
||||
// These methods round towards -Infinity.
|
||||
int32_t float2fix(float f, int e);
|
||||
uint32_t float2ufix(float f, int e);
|
||||
int64_t float2fix64(float f, int e);
|
||||
uint64_t float2ufix64(float f, int e);
|
||||
int32_t float2int(float f);
|
||||
uint32_t float2uint(float f);
|
||||
int64_t float2int64(float f);
|
||||
uint64_t float2uint64(float f);
|
||||
|
||||
// These methods round towards 0.
|
||||
// These methods round towards 0, which IS the C way
|
||||
int32_t float2int_z(float f);
|
||||
int64_t float2int64_z(float f);
|
||||
int32_t float2uint_z(float f);
|
||||
int64_t float2uint64_z(float f);
|
||||
int32_t float2fix_z(float f, int e);
|
||||
uint32_t float2ufix_z(float f, int e);
|
||||
int64_t float2fix64_z(float f, int e);
|
||||
uint64_t float2ufix64_z(float f, int e);
|
||||
|
||||
// These methods round towards -Infinity - which IS NOT the C way for negative numbers;
|
||||
// as such the naming is not ideal, however is kept for backwards compatibility
|
||||
int32_t float2int(float f);
|
||||
uint32_t float2uint(float f);
|
||||
int64_t float2int64(float f);
|
||||
uint64_t float2uint64(float f);
|
||||
int32_t float2fix(float f, int e);
|
||||
uint32_t float2ufix(float f, int e);
|
||||
int64_t float2fix64(float f, int e);
|
||||
uint64_t float2ufix64(float f, int e);
|
||||
|
||||
#if LIB_PICO_FLOAT_PICO_VFP
|
||||
// a bit of a hack to inline VFP fixed point conversion when exponent is constant and in range 1-32
|
||||
#define fix2float(m, e) __builtin_choose_expr(__builtin_constant_p(e), (e) >= 1 && (e) <= 32 ? _fix2float_inline(m, e) : fix2 ## float(m, e), fix2 ## float(m, e))
|
||||
#define ufix2float(m, e) __builtin_choose_expr(__builtin_constant_p(e), (e) >= 1 && (e) <= 32 ? _ufix2float_inline(m, e) : ufix2 ## float(m, e), ufix2 ## float(m, e))
|
||||
#define float2fix_z(f, e) __builtin_choose_expr(__builtin_constant_p(e), (e) >= 1 && (e) <= 32 ? _float2fix_z_inline(f, e) : float2 ## fix_z(f, e), float2 ## fix_z(f, e))
|
||||
#define float2ufix_z(f, e) __builtin_choose_expr(__builtin_constant_p(e), (e) >= 1 && (e) <= 32 ? _float2ufix_z_inline(f, e) : float2 ## ufix_z(f, e), float2 ## ufix_z(f, e))
|
||||
#define float2fix(f, e) __builtin_choose_expr(__builtin_constant_p(e), (e) >= 1 && (e) <= 32 ? _float2fix_inline(f, e) : float2 ## fix(f, e), float2 ## fix(f, e))
|
||||
#define float2ufix(f, e) __builtin_choose_expr(__builtin_constant_p(e), (e) >= 1 && (e) <= 32 ? _float2ufix_inline(f, e) : float2 ## ufix(f, e), float2 ## ufix(f, e))
|
||||
|
||||
#define _fix2float_inline(m, e) ({ \
|
||||
int32_t _m = m; \
|
||||
float f; \
|
||||
pico_default_asm( \
|
||||
"vmov %0, %1\n" \
|
||||
"vcvt.f32.s32 %0, %0, %2\n" \
|
||||
: "=t" (f) \
|
||||
: "r" (_m), "i" (e) \
|
||||
); \
|
||||
f; \
|
||||
})
|
||||
#define _ufix2float_inline(m, e) ({ \
|
||||
uint32_t _m = m; \
|
||||
float f; \
|
||||
pico_default_asm( \
|
||||
"vmov %0, %1\n" \
|
||||
"vcvt.f32.u32 %0, %0, %2\n" \
|
||||
: "=t" (f) \
|
||||
: "r" (_m), "i" (e) \
|
||||
); \
|
||||
f; \
|
||||
})
|
||||
#define _float2fix_z_inline(f, e) ({ \
|
||||
int32_t _m; \
|
||||
float _f = (f); \
|
||||
pico_default_asm( \
|
||||
"vcvt.s32.f32 %0, %0, %2\n" \
|
||||
"vmov %1, %0\n" \
|
||||
: "+t" (_f), "=r" (_m) \
|
||||
: "i" (e) \
|
||||
); \
|
||||
_m; \
|
||||
})
|
||||
#define _float2ufix_z_inline(f, e) ({ \
|
||||
uint32_t _m; \
|
||||
float _f = (f); \
|
||||
pico_default_asm( \
|
||||
"vcvt.u32.f32 %0, %0, %2\n" \
|
||||
"vmov %1, %0\n" \
|
||||
: "+t" (_f), "=r" (_m) \
|
||||
: "i" (e) \
|
||||
); \
|
||||
_m; \
|
||||
})
|
||||
#define _float2fix_z_inline(f, e) ({ \
|
||||
int32_t _m; \
|
||||
float _f = (f); \
|
||||
pico_default_asm( \
|
||||
"vcvt.s32.f32 %0, %0, %2\n" \
|
||||
"vmov %1, %0\n" \
|
||||
: "+t" (_f), "=r" (_m) \
|
||||
: "i" (e) \
|
||||
); \
|
||||
_m; \
|
||||
})
|
||||
#define _float2fix_inline(f, e) ({ \
|
||||
union { float _f; int32_t _i; } _u; \
|
||||
_u._f = (f); \
|
||||
uint rc, tmp; \
|
||||
pico_default_asm( \
|
||||
"vcvt.s32.f32 %0, %0, %4\n" \
|
||||
"vmov %2, %0\n" \
|
||||
"lsls %1, #1\n" \
|
||||
"bls 2f\n" /* positive or zero or -zero are ok with the result we have */ \
|
||||
"lsrs %3, %1, #24\n" \
|
||||
"subs %3, #0x7f - %c4\n" \
|
||||
"bcc 1f\n" /* 0 < abs(f) < 1 ^ e, so need to round down */ \
|
||||
/* mask off all but fractional bits */ \
|
||||
"lsls %1, %3\n" \
|
||||
"lsls %1, #8\n" \
|
||||
"beq 2f\n" /* integers can round towards zero */ \
|
||||
"1:\n" \
|
||||
/* need to subtract 1 from the result to round towards -infinity... */ \
|
||||
/* this will never cause an overflow, because to get here we must have had a non integer/infinite value which */ \
|
||||
/* therefore cannot have been equal to INT64_MIN when rounded towards zero */ \
|
||||
"subs %2, #1\n" \
|
||||
"2:\n" \
|
||||
: "+t" (_u._f), "+r" (_u._i), "=r" (rc), "=r" (tmp) \
|
||||
: "i" (e) \
|
||||
); \
|
||||
rc; \
|
||||
})
|
||||
#define _float2ufix_inline(f, e) _float2ufix_z_inline((f), (e))
|
||||
#endif
|
||||
|
||||
#if LIB_PICO_FLOAT_PICO_VFP
|
||||
// may as well provide inline macros for VFP
|
||||
#define int2float(i) ((float)(int32_t)(i))
|
||||
#define uint2float(i) ((float)(uint32_t)(i))
|
||||
#define float2int_z(f) ((int32_t)(f))
|
||||
#define float2uint_z(f) ((uint32_t)(f))
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
float exp10f(float x);
|
||||
void sincosf(float x, float *sinx, float *cosx);
|
||||
float powintf(float x, int y);
|
||||
|
||||
#if !PICO_RP2040 || PICO_COMBINED_DOCS
|
||||
int64_t float2fix64_z(float f, int e);
|
||||
float fdiv_fast(float n, float d);
|
||||
float fsqrt_fast(float f);
|
||||
float sqrtf_fast(float f);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__riscv) || LIB_PICO_FLOAT_COMPILER
|
||||
// when using the compiler or RISC-V, we provide as many functions as we trivially can - these will be efficient
|
||||
// when using hard-float on Arm
|
||||
static inline float int2float(int32_t i) { return (float)i; }
|
||||
static inline float uint2float(uint32_t i) { return (float)i; }
|
||||
static inline float int642float(int64_t i) { return (float)i; }
|
||||
static inline float uint642float(uint64_t i) { return (float)i; }
|
||||
|
||||
static inline int32_t float2int_z(float f) { return (int32_t)f; }
|
||||
static inline int64_t float2int64_z(float f) { return (int64_t)f; }
|
||||
static inline int32_t float2uint_z(float f) { return (uint32_t)f; }
|
||||
static inline int64_t float2uint64_z(float f) { return (uint64_t)f; }
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@@ -85,3 +85,12 @@ filegroup(
|
||||
name = "m33",
|
||||
srcs = ["m33.c"],
|
||||
)
|
||||
|
||||
# TODO: Add these tests to the Bazel build.
|
||||
filegroup(
|
||||
name = "unsupported_tests",
|
||||
srcs = [
|
||||
"custom_double_funcs_test.c",
|
||||
"custom_float_funcs_test.c",
|
||||
],
|
||||
)
|
@@ -79,4 +79,31 @@ else ()
|
||||
target_link_libraries(m33 pico_double pico_stdlib)
|
||||
pico_add_extra_outputs(m33)
|
||||
endif()
|
||||
|
||||
endif()
|
||||
|
||||
set(FLOAT_TYPES compiler)
|
||||
set(DOUBLE_TYPES compiler)
|
||||
list(APPEND FLOAT_TYPES pico)
|
||||
list(APPEND DOUBLE_TYPES pico)
|
||||
if (PICO_RP2350)
|
||||
if (NOT PICO_RISCV)
|
||||
list(APPEND FLOAT_TYPES pico_vfp pico_dcp)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
foreach (FLOAT_TYPE IN LISTS FLOAT_TYPES)
|
||||
add_executable(custom_float_funcs_test_${FLOAT_TYPE} custom_float_funcs_test.c)
|
||||
pico_set_float_implementation(custom_float_funcs_test_${FLOAT_TYPE} ${FLOAT_TYPE})
|
||||
target_link_libraries(custom_float_funcs_test_${FLOAT_TYPE} PRIVATE pico_stdlib)
|
||||
pico_add_extra_outputs(custom_float_funcs_test_${FLOAT_TYPE})
|
||||
pico_set_printf_implementation(custom_float_funcs_test_${FLOAT_TYPE} compiler)
|
||||
endforeach ()
|
||||
|
||||
foreach (DOUBLE_TYPE IN LISTS DOUBLE_TYPES)
|
||||
add_executable(custom_double_funcs_test_${DOUBLE_TYPE} custom_double_funcs_test.c)
|
||||
pico_set_double_implementation(custom_double_funcs_test_${DOUBLE_TYPE} ${DOUBLE_TYPE})
|
||||
target_link_libraries(custom_double_funcs_test_${DOUBLE_TYPE} PRIVATE pico_stdlib)
|
||||
pico_add_extra_outputs(custom_double_funcs_test_${DOUBLE_TYPE})
|
||||
pico_set_printf_implementation(custom_double_funcs_test_${DOUBLE_TYPE} compiler)
|
||||
endforeach ()
|
515
test/pico_float_test/custom_double_funcs_test.c
Normal file
515
test/pico_float_test/custom_double_funcs_test.c
Normal file
@@ -0,0 +1,515 @@
|
||||
#include <stdio.h>
|
||||
#include "pico/stdlib.h"
|
||||
#include "pico/double.h"
|
||||
#include "math.h"
|
||||
|
||||
#if 0
|
||||
#define printf(...) ((void)0)
|
||||
#endif
|
||||
#if 0
|
||||
#define stop() return -1
|
||||
#else
|
||||
#define stop() rc=1
|
||||
#endif
|
||||
#define test_assert(x) ({ if (!(x)) { printf("Assertion failed: ");puts(#x);printf(" at " __FILE__ ":%d\n", __LINE__); stop(); } })
|
||||
#define test_checkd(x, expected, msg) ({ if ((x) != (expected)) { printf(" %s: %f != %f\n", msg, x, expected); stop(); } })
|
||||
#define test_checki(x, expected, msg) ({ if ((x) != (expected)) { printf(" %s: %d != %d\n", msg, x, expected); stop(); } })
|
||||
#define test_checku(x, expected, msg) ({ if ((uint32_t)(x) != (uint32_t)(expected)) { printf(" %s: %u != %u\n", msg, x, expected); stop(); } })
|
||||
#define test_checki64(x, expected, msg) ({ if ((x) != (expected)) { printf(" %s: %lld != %lld\n", msg, (int64_t)(x), (int64_t)(expected)); stop(); } })
|
||||
#define test_checku64(x, expected, msg) ({ if ((uint64_t)(x) != (uint64_t)(expected)) { printf(" %s: %llu != %llu\n", msg, (uint64_t)(x), (uint64_t)(expected)); stop(); } })
|
||||
|
||||
#if !(LIB_PICO_DOUBLE_COMPILER || defined(__riscv))
|
||||
static inline double fix2double_8(int32_t m) { return fix2double(m, 8); }
|
||||
static inline double fix2double_12(int32_t m) { return fix2double(m, 12); }
|
||||
static inline double fix2double_16(int32_t m) { return fix2double(m, 16); }
|
||||
static inline double fix2double_24(int32_t m) { return fix2double(m, 24); }
|
||||
static inline double fix2double_28(int32_t m) { return fix2double(m, 28); }
|
||||
static inline double fix2double_32(int32_t m) { return fix2double(m, 32); }
|
||||
|
||||
static inline double ufix2double_12(int32_t m) { return ufix2double(m, 12); }
|
||||
|
||||
static inline double double2fix_12(int32_t m) { return double2fix(m, 12); }
|
||||
|
||||
static inline double double2ufix_12(int32_t m) { return double2ufix(m, 12); }
|
||||
#endif
|
||||
|
||||
#if 1 && (LIB_PICO_DOUBLE_COMPILER || defined(__riscv))
|
||||
#define double2int_z(f) ({ double _d = f; pico_default_asm_volatile("" : "+r" (_d)); double2 ## int_z(_d); })
|
||||
#define double2uint_z(f) ({ double _d = f; pico_default_asm_volatile("" : "+r" (_d)); double2 ## uint_z(_d); })
|
||||
#define double2int64_z(f) ({ double _d = f; pico_default_asm_volatile("" : "+r" (_d)); double2 ## int64_z(_d); })
|
||||
#define double2uint64_z(f) ({ double _d = f; pico_default_asm_volatile("" : "+r" (_d)); double2 ## uint64_z(_d); })
|
||||
#define int2double(i) ({ int32_t _i = i; pico_default_asm_volatile("" : "+r" (_i)); int2 ## double(_i); })
|
||||
#define uint2double(i) ({ uint32_t _i = i; pico_default_asm_volatile("" : "+r" (_i)); uint2 ## double(_i); })
|
||||
#define int642double(i) ({ int64_t _i = i; pico_default_asm_volatile("" : "+r" (_i)); int642 ## double(_i); })
|
||||
#define uint642double(i) ({ uint64_t _i = i; pico_default_asm_volatile("" : "+r" (_i)); uint642 ## double(_i); })
|
||||
#endif
|
||||
|
||||
int test() {
|
||||
int rc = 0;
|
||||
#if LIB_PICO_DOUBLE_PICO
|
||||
printf(">>> Using PICO\n");
|
||||
#endif
|
||||
printf("int2double\n");
|
||||
test_checkd(int2double(0), 0.0, "int2double1");
|
||||
test_checkd(int2double(-1), -1.0, "int2double2");
|
||||
test_checkd(int2double(1), 1.0, "int2double3");
|
||||
test_checkd(int2double(INT32_MAX), 2147483647.0, "int2double4");
|
||||
test_checkd(int2double(INT32_MIN), -2147483648.0, "int2double5");
|
||||
// these have rounding behavior on float but not double
|
||||
test_checkd(int2double(2147483391), 2147483391.0, "int2double6");
|
||||
test_checkd(int2double(2147483391), 2147483391.0, "int2double7");
|
||||
test_checkd(int2double(2147483457), 2147483457.0, "int2double8");
|
||||
test_checkd(int2double(2147483483), 2147483483.0, "int2double9");
|
||||
test_checkd(int2double(2147483584), 2147483584.0, "int2double10");
|
||||
|
||||
printf("uint2double\n");
|
||||
test_checkd(uint2double(0), 0.0, "uint2double1");
|
||||
test_checkd(uint2double(1), 1.0, "uint2double2");
|
||||
test_checkd(uint2double(INT32_MAX), 2147483647.0, "uint2double3");
|
||||
// todo test correct rounding around maximum precision
|
||||
test_checkd(uint2double(UINT32_MAX), 4294967295.0, "uint2double4");
|
||||
|
||||
printf("int642double\n");
|
||||
test_checkd(int642double(0), 0.0, "int642double1");
|
||||
test_checkd(int642double(-1), -1.0, "int642double2");
|
||||
test_checkd(int642double(1), 1.0, "int642double3");
|
||||
test_checkd(int642double(INT32_MAX-1), 2147483646.0, "int642double4");
|
||||
test_checkd(int642double(INT32_MAX), 2147483647.0, "int642double5");
|
||||
test_checkd(int642double(INT32_MAX+1ll), 2147483648.0, "int642double6");
|
||||
test_checkd(int642double(INT32_MIN-1ll), -2147483649.0, "int642double7");
|
||||
test_checkd(int642double(INT32_MIN), -2147483648.0, "int642double8");
|
||||
test_checkd(int642double(INT32_MIN+1ll), -2147483647.0, "int642double9");
|
||||
// todo test correct rounding around maximum precision
|
||||
test_checkd(int642double(INT64_MAX), 9223372036854775807.0, "int642double10");
|
||||
test_checkd(int642double(INT64_MIN), -9223372036854775808.0, "int642doubl11e");
|
||||
|
||||
printf("uint642double\n");
|
||||
test_checkd(uint642double(0), 0.0, "uint642double1");
|
||||
test_checkd(uint642double(1), 1.0, "uint642double2");
|
||||
test_checkd(uint642double(INT32_MAX-1), 2147483646.0, "uint642double3");
|
||||
test_checkd(uint642double(INT32_MAX), 2147483647.0, "uint642double4");
|
||||
test_checkd(uint642double(INT32_MAX+1ll), 2147483648.0, "uint642double5");
|
||||
test_checkd(uint642double(INT64_MAX), 9223372036854775807.0, "uint642double6");
|
||||
// todo test correct rounding around maximum precision
|
||||
test_checkd(uint642double(UINT64_MAX), 18446744073709551615.0, "uint642double7");
|
||||
|
||||
union {
|
||||
uint64_t u;
|
||||
double d;
|
||||
} u64d;
|
||||
|
||||
#if !(LIB_PICO_DOUBLE_COMPILER || defined(__riscv))
|
||||
printf("fix2double\n");
|
||||
// todo test correct rounding around maximum precision
|
||||
test_checkd(fix2double(-3, 1), -1.5, "fix2double1");
|
||||
test_checkd(fix2double(-3, 1), -1.5, "fix2double2");
|
||||
test_checkd(fix2double(-3, -4), -48.0, "fix2double3");
|
||||
|
||||
printf("ufix2double\n");
|
||||
// todo test correct rounding around maximum precision
|
||||
test_checkd(ufix2double(0xa0000000, 30), 2.5, "ufix2double1");
|
||||
test_checkd(ufix2double(3, -4), 48.0, "ufix2double2");
|
||||
|
||||
printf("fix64double\n");
|
||||
// todo test correct rounding around maximum precision
|
||||
test_checkd(fix642double(-0xa000000000ll, 38), -2.5, "fix642double1");
|
||||
test_checkd(fix642double(-3, -34), -51539607552.0, "fix642double2");
|
||||
|
||||
printf("ufix642double\n");
|
||||
// todo test correct rounding around maximum precision
|
||||
test_checkd(ufix642double(0xa000000000ll, 38), 2.5, "ufix642double1");
|
||||
test_checkd(ufix642double(3, -34), 51539607552.0, "fix64double2");
|
||||
|
||||
test_checkd(fix2double_8(128), 0.5, "fix2double_8_1");
|
||||
test_checkd(fix2double_8(-128), -0.5, "fix2double_8_2");
|
||||
test_checkd(fix2double_16(8192), 0.125, "fix2double_8_3");
|
||||
test_checkd(fix2double_16(-8192), -0.125, "fix2double_8_4");
|
||||
test_checkd(fix2double_24(3<<23), 1.5, "fix2double_8_5");
|
||||
test_checkd(fix2double_24(-(3<<23)), -1.5, "fix2double_8_6");
|
||||
|
||||
printf("double2fix\n");
|
||||
test_checki(double2fix(-0.5, 8), -0x80, "double2fix0");
|
||||
test_checki(double2fix(3.5, 8), 0x380, "double2fix1");
|
||||
test_checki(double2fix(-3.5, 8), -0x380, "double2fix2");
|
||||
test_checki(double2fix(32768.0, 16), INT32_MAX, "double2fix3");
|
||||
test_checki(double2fix(65536.0, 16), INT32_MAX, "double2fix4");
|
||||
test_checki(double2fix(-65536.0, 16), INT32_MIN, "double2fix4b");
|
||||
test_checki(double2fix(INFINITY, 16), INT32_MAX, "double2fix5");
|
||||
test_checki(double2fix(-INFINITY, 16), INT32_MIN, "double2fix5b");
|
||||
test_checki(double2fix(INFINITY, -16), INT32_MAX, "double2fix5c");
|
||||
test_checki(double2fix(-INFINITY, -16), INT32_MIN, "double2fix5d");
|
||||
test_checki(double2fix(3.24999, 2), 12, "double2fix6");
|
||||
test_checki(double2fix(3.25, 2), 13, "double2fix7");
|
||||
test_checki(double2fix(-3.24999, 2), -13, "double2fix8");
|
||||
test_checki(double2fix(-3.25, 2), -13, "double2fix9");
|
||||
test_checki(double2fix(-0.75, 1), -2, "double2fix10");
|
||||
test_checki(double2fix(-3.0, -1), -2, "double2fix11"); // not very useful
|
||||
test_checki(double2fix(0.0, 16), 0, "double2fix12");
|
||||
test_checki(double2fix(-0.0, 16), 0, "double2fix13");
|
||||
test_checki(double2fix(0.0, -16), 0, "double2fix14");
|
||||
test_checki(double2fix(-0.0, -16), 0, "double2fix15");
|
||||
|
||||
printf("double2ufix\n");
|
||||
test_checku(double2ufix(3.5, 8), 0x380, "double2ufix1");
|
||||
test_checku(double2ufix(-3.5, 8), 0, "double2ufix2");
|
||||
test_checku(double2ufix(32768.0, 16), 32768 << 16, "double2ufix3");
|
||||
test_checku(double2ufix(65536.0, 16), UINT32_MAX, "double2ufix4");
|
||||
test_checku(double2ufix(INFINITY, 16), UINT32_MAX, "double2ufix5");
|
||||
test_checku(double2ufix(-INFINITY, 16), 0, "double2ufix5b");
|
||||
test_checku(double2ufix(INFINITY, -16), UINT32_MAX, "double2ufix5c");
|
||||
test_checku(double2ufix(-INFINITY, -16), 0, "double2ufix5d");
|
||||
test_checku(double2ufix(3.24999, 2), 12, "double2ufix6");
|
||||
test_checku(double2ufix(3.25, 2), 13, "double2ufix7");
|
||||
test_checku(double2ufix(3.0, -1), 1, "double2ufix8"); // not very useful
|
||||
test_checki(double2ufix(0.0, 16), 0, "double2ufix12");
|
||||
test_checki(double2ufix(-0.0, 16), 0, "double2fix13");
|
||||
test_checki(double2ufix(0.0, -16), 0, "double2ufix14");
|
||||
test_checki(double2ufix(-0.0, -16), 0, "double2fix15");
|
||||
|
||||
printf("double2fix64\n");
|
||||
test_checki64(double2fix64(3.5, 8), 0x380, "double2fix641");
|
||||
test_checki64(double2fix64(-3.5, 8), -0x380, "double2fix642");
|
||||
test_checki64(double2fix64(32768.0, 16), 32768ll << 16, "double2fix643");
|
||||
test_checki64(double2fix64(65536.0, 16), 65536ll << 16, "double2fix644");
|
||||
test_checki64(double2fix64(2147483648.0, 16), 2147483648ll << 16, "double2ufix644b");
|
||||
test_checki64(double2fix64(65536.0 * 65536.0 * 32768.0, 16), INT64_MAX, "double2fix644c");
|
||||
test_checki64(double2fix64(INFINITY, 16), INT64_MAX, "double2fix645");
|
||||
test_checki64(double2fix64(-INFINITY, 16), INT64_MIN, "double2fix645b");
|
||||
test_checki64(double2fix64(INFINITY, -16), INT64_MAX, "double2fix645c");
|
||||
test_checki64(double2fix64(-INFINITY, -16), INT64_MIN, "double2fix645d");
|
||||
test_checki64(double2fix64(3.24999, 2), 12, "double2fix646");
|
||||
test_checki64(double2fix64(3.25, 2), 13, "double2fix647");
|
||||
test_checki64(double2fix64(-3.24999, 2), -13, "double2fix648");
|
||||
test_checki64(double2fix64(-3.25, 2), -13, "double2fix649");
|
||||
test_checki64(double2fix64(-3.0, -1), -2, "double2fix6410"); // not very useful
|
||||
test_checki64(double2fix64(2147483648.0 * 2147483648.0, 16), INT64_MAX, "double2ufix6411");
|
||||
test_checki64(double2fix64(0.0, 16), 0, "double2fix6412");
|
||||
test_checki64(double2fix64(-0.0, 16), 0, "double2fix6413");
|
||||
test_checki64(double2fix64(0.0, -16), 0, "double2fix6412b");
|
||||
test_checki64(double2fix64(-0.0, -16), 0, "double2fix6413b");
|
||||
test_checki64(double2fix64(-3.25, 40), -13ll * (1ll << 38), "double2fix6414");
|
||||
u64d.u = 0xc00a000000000001;
|
||||
test_checki64(double2fix64(u64d.d, 40), -13ll * (1ll << 38) - 1ll, "double2fix6414b");
|
||||
|
||||
u64d.u = 0xc00a000080000001;
|
||||
test_checki64(double2fix64(u64d.d, 20), -13ll * (1ll << 18) - 2ll, "double2fix6415c");
|
||||
u64d.u = 0xc00a000080000000;
|
||||
test_checki64(double2fix64(u64d.d, 20), -13ll * (1ll << 18) - 1ll, "double2fix6415d");
|
||||
u64d.u = 0xc00a000000000001;
|
||||
test_checki64(double2fix64(u64d.d, 20), -13ll * (1ll << 18) - 1ll, "double2fix6415e");
|
||||
u64d.u = 0xc00a000000000000;
|
||||
test_checki64(double2fix64(u64d.d, 20), -13ll * (1ll << 18), "double2fix6415g");
|
||||
|
||||
u64d.u = 0xc00a000080000001;
|
||||
test_checki64(double2fix64(u64d.d, 19), -13ll * (1ll << 17) - 1ll, "double2fix6415h");
|
||||
u64d.u = 0xc00a000080000000;
|
||||
test_checki64(double2fix64(u64d.d, 19), -13ll * (1ll << 17) - 1ll, "double2fix6415i");
|
||||
u64d.u = 0xc00a000000000001;
|
||||
test_checki64(double2fix64(u64d.d, 19), -13ll * (1ll << 17) - 1ll, "double2fix6415j");
|
||||
u64d.u = 0xc00a000000000000;
|
||||
test_checki64(double2fix64(u64d.d, 19), -13ll * (1ll << 17), "double2fix6415k");
|
||||
|
||||
printf("double2ufix64\n");
|
||||
test_checku64(double2ufix64(3.5, 8), 0x380, "double2ufix641");
|
||||
test_checku64(double2ufix64(-3.5, 8), 0, "double2ufix642");
|
||||
test_checku64(double2ufix64(32768.0, 16), 32768ull << 16, "double2ufix643");
|
||||
test_checku64(double2ufix64(65536.0, 16), 65536ull << 16, "double2ufix644");
|
||||
test_checku64(double2ufix64(2147483648.0, 16), 2147483648ull << 16, "double2ufix644b");
|
||||
test_checku64(double2ufix64(INFINITY, 16), UINT64_MAX, "double2ufix645");
|
||||
test_checku64(double2ufix64(-INFINITY, 16), 0, "double2ufix645b");
|
||||
test_checku64(double2ufix64(INFINITY, -16), UINT64_MAX, "double2ufix645c");
|
||||
test_checku64(double2ufix64(-INFINITY, -16), 0, "double2ufix645d");
|
||||
test_checku64(double2ufix64(3.24999, 2), 12, "double2ufix646");
|
||||
test_checku64(double2ufix64(3.25, 2), 13, "double2ufix647");
|
||||
test_checku64(double2ufix64(3.0, -1), 1, "double2ufix648"); // not very useful
|
||||
test_checki64(double2ufix64(0.0, 16), 0, "double2ufix649");
|
||||
test_checki64(double2ufix64(-0.0, 16), 0, "double2ufix6410");
|
||||
|
||||
printf("double2fix_z\n");
|
||||
test_checki(double2fix_z(3.5, 8), 0x380, "double2fix_z1");
|
||||
test_checki(double2fix_z(-3.5, 8), -0x380, "double2fix_z2");
|
||||
test_checki(double2fix_z(32768.0, 16), INT32_MAX, "double2fix_z3");
|
||||
test_checki(double2fix_z(65536.0, 16), INT32_MAX, "double2fix_z4");
|
||||
test_checki(double2fix_z(INFINITY, 16), INT32_MAX, "double2fix_z5");
|
||||
test_checki(double2fix_z(-INFINITY, 16), INT32_MIN, "double2fix_z5b");
|
||||
test_checki(double2fix_z(INFINITY, -50), INT32_MAX, "double2fix_z5c");
|
||||
test_checki(double2fix_z(-INFINITY, -50), INT32_MIN, "double2fix_z5d");
|
||||
test_checki(double2fix_z(3.24999, 2), 12, "double2fix_z6");
|
||||
test_checki(double2fix_z(3.25, 2), 13, "double2fix_z7");
|
||||
test_checki(double2fix_z(-3.24999, 2), -12, "double2fix_z8");
|
||||
test_checki(double2fix_z(-3.25, 2), -13, "double2fix_z9");
|
||||
test_checki(double2fix_z(-0.75, 1), -1, "double2fix_z10");
|
||||
test_checki(double2fix_z(-3.0, -1), -1, "double2fix_z11"); // not very useful
|
||||
test_checki(double2fix_z(0.0, 16), 0, "double2fix_z12");
|
||||
test_checki(double2fix_z(-0.0, 16), 0, "double2fix_z13");
|
||||
test_checki(double2fix_z(0.0, -16), 0, "double2fix_z12b");
|
||||
test_checki(double2fix_z(-0.0, -16), 0, "double2fix_z13b");
|
||||
|
||||
printf("double2ufix_z\n");
|
||||
test_checku(double2ufix_z(3.5, 8), 0x380, "double2ufix_z1");
|
||||
test_checku(double2ufix_z(-3.5, 8), 0, "double2ufix_z2");
|
||||
test_checku(double2ufix_z(32768.0, 16), 32768 << 16, "double2ufix_z3");
|
||||
test_checku(double2ufix_z(65536.0, 16), UINT32_MAX, "double2ufix_z4");
|
||||
test_checku(double2ufix_z(INFINITY, 16), UINT32_MAX, "double2ufix_z5");
|
||||
test_checku(double2ufix_z(-INFINITY, 16), 0, "double2ufix_z5b");
|
||||
test_checku(double2ufix_z(INFINITY, 16), UINT32_MAX, "double2ufix_z5c");
|
||||
test_checku(double2ufix_z(-INFINITY, 16), 0, "double2ufix_z5d");
|
||||
test_checku(double2ufix_z(3.24999, 2), 12, "double2ufix_z6");
|
||||
test_checku(double2ufix_z(3.25, 2), 13, "double2ufix_z7");
|
||||
test_checku(double2ufix_z(3.0, -1), 1, "double2ufix_z8"); // not very useful
|
||||
test_checki(double2ufix_z(0.0, 16), 0, "double2fix_z9");
|
||||
test_checki(double2ufix_z(-0.0, 16), 0, "double2fix_z10");
|
||||
test_checki(double2ufix_z(0.0, -16), 0, "double2fix_z11");
|
||||
test_checki(double2ufix_z(-0.0, -16), 0, "double2fix_z12");
|
||||
|
||||
printf("double2fix64_z\n");
|
||||
test_checki64(double2fix64_z(3.5, 8), 0x380, "double2fix64_z1");
|
||||
test_checki64(double2fix64_z(-3.5, 8), -0x380, "double2fix64_z2");
|
||||
test_checki64(double2fix64_z(32768.0, 16), 32768ll << 16, "double2fix64_z3");
|
||||
test_checki64(double2fix64_z(65536.0, 16), 65536ll << 16, "double2fix64_z4");
|
||||
test_checki64(double2fix64_z(65536.0 * 65536.0 * 32768.0, 16), INT64_MAX, "double2fix64_z4b");
|
||||
test_checki64(double2fix64_z(INFINITY, 16), INT64_MAX, "double2fix64_z5");
|
||||
test_checki64(double2fix64_z(-INFINITY, 16), INT64_MIN, "double2fix64_z5");
|
||||
test_checki64(double2fix64_z(INFINITY, 16), INT64_MAX, "double2fix64_z5");
|
||||
test_checki64(double2fix64_z(-INFINITY, 16), INT64_MIN, "double2fix64_z5");
|
||||
test_checki64(double2fix64_z(3.24999, 2), 12, "double2fix64_z6");
|
||||
test_checki64(double2fix64_z(3.25, 2), 13, "double2fix64_z7");
|
||||
test_checki64(double2fix64_z(-3.24999, 2), -12, "double2fix64_z8");
|
||||
test_checki64(double2fix64_z(-3.25, 2), -13, "double2fix64_z9");
|
||||
test_checki64(double2fix64_z(-3.0, -1), -1, "double2fix64_z10"); // not very useful
|
||||
test_checki64(double2fix64_z(0.0, 16), 0, "double2fix64_z11");
|
||||
test_checki64(double2fix64_z(-0.0, 16), 0, "double2fix64_z12");
|
||||
test_checki64(double2fix64_z(0.0, -16), 0, "double2fix64_z13");
|
||||
test_checki64(double2fix64_z(-0.0, -16), 0, "double2fix64_z14");
|
||||
test_checki64(double2fix64_z(-3.25, 40), -13ll * (1ll << 38), "double2fix64_z15");
|
||||
u64d.u = 0xc00a000000000001;
|
||||
test_checki64(double2fix64_z(u64d.d, 40), -13ll * (1ll << 38), "double2fix64_z15b");
|
||||
|
||||
u64d.u = 0xc00a000080000001;
|
||||
test_checki64(double2fix64_z(u64d.d, 20), -13ll * (1ll << 18) - 1ll, "double2fix64_z15c");
|
||||
u64d.u = 0xc00a000080000000;
|
||||
test_checki64(double2fix64_z(u64d.d, 20), -13ll * (1ll << 18) - 1ll, "double2fix64_z15d");
|
||||
u64d.u = 0xc00a000000000001;
|
||||
test_checki64(double2fix64_z(u64d.d, 20), -13ll * (1ll << 18), "double2fix64_z15e");
|
||||
u64d.u = 0xc00a000000000000;
|
||||
test_checki64(double2fix64_z(u64d.d, 20), -13ll * (1ll << 18), "double2fix64_z15g");
|
||||
|
||||
u64d.u = 0xc00a000080000001;
|
||||
test_checki64(double2fix64_z(u64d.d, 19), -13ll * (1ll << 17), "double2fix64_z15h");
|
||||
u64d.u = 0xc00a000080000000;
|
||||
test_checki64(double2fix64_z(u64d.d, 19), -13ll * (1ll << 17), "double2fix64_z15i");
|
||||
u64d.u = 0xc00a000000000001;
|
||||
test_checki64(double2fix64_z(u64d.d, 19), -13ll * (1ll << 17), "double2fix64_z15j");
|
||||
u64d.u = 0xc00a000000000000;
|
||||
test_checki64(double2fix64_z(u64d.d, 19), -13ll * (1ll << 17), "double2fix64_z15k");
|
||||
|
||||
printf("double2ufix64_z\n");
|
||||
test_checku64(double2ufix64_z(3.5, 8), 0x380, "double2ufix64_z1");
|
||||
test_checku64(double2ufix64_z(-3.5, 8), 0, "double2ufix64_z2");
|
||||
test_checku64(double2ufix64_z(32768.0, 16), 32768ll << 16, "double2ufix64_z3");
|
||||
test_checku64(double2ufix64_z(65536.0, 16), 65536ll << 16, "double2ufix64_z4");
|
||||
test_checki64(double2ufix64_z(65536.0 * 65536.0 * 65536.0, 16), UINT64_MAX, "double2fix64_z4b");
|
||||
test_checku64(double2ufix64_z(INFINITY, 16), UINT64_MAX, "double2ufix64_z5");
|
||||
test_checku64(double2ufix64_z(-INFINITY, 16), 0, "double2ufix64_z5b");
|
||||
test_checku64(double2ufix64_z(INFINITY, 16), UINT64_MAX, "double2ufix64_z5c");
|
||||
test_checku64(double2ufix64_z(-INFINITY, 16), 0, "double2ufix64_z5d");
|
||||
test_checku64(double2ufix64_z(3.24999, 2), 12, "double2ufix64_z6");
|
||||
test_checku64(double2ufix64_z(3.25, 2), 13, "double2ufix64_z7");
|
||||
test_checki64(double2ufix64_z(3.0, -1), 1, "double2fuix64_z8"); // not very useful
|
||||
test_checki64(double2ufix64_z(0.0, 16), 0, "double2ufix64_z9");
|
||||
test_checki64(double2ufix64_z(-0.0, 16), 0, "double2ufix64_z10");
|
||||
test_checki64(double2ufix64_z(0.0, -16), 0, "double2ufix64_z11");
|
||||
test_checki64(double2ufix64_z(-0.0, -16), 0, "double2ufix64_z12");
|
||||
|
||||
printf("double2int\n");
|
||||
test_checki(double2int(0.0), 0, "double2int1");
|
||||
test_checki(double2int(0.25), 0, "double2int1b");
|
||||
test_checki(double2int(0.5), 0, "double2int2");
|
||||
test_checki(double2int(0.75), 0, "double2int2b");
|
||||
test_checki(double2int(1.0), 1, "double2int3");
|
||||
test_checki(double2int(-10.0), -10, "double2int3a");
|
||||
test_checki(double2int(-0.0), 0, "double2int3b");
|
||||
test_checki(double2int(-0.25), -1, "double2int4");
|
||||
test_checki(double2int(-0.5), -1, "double2int4b");
|
||||
test_checki(double2int(-0.75), -1, "double2int5");
|
||||
test_checki(double2int(-1.0), -1, "double2int5b");
|
||||
// todo test correct rounding around maximum precision
|
||||
test_checki(double2int(2147483646.0), INT32_MAX-1, "double2int6");
|
||||
test_checki(double2int(2147483647.0), INT32_MAX, "double2int6b");
|
||||
test_checki(double2int(21474836470.0), INT32_MAX, "double2int7");
|
||||
test_checki(double2int(-2147483648.0), INT32_MIN, "double2int8");
|
||||
test_checki(double2int(-21474836480.0), INT32_MIN, "double2int9");
|
||||
test_checki(double2int(-2.5), -3, "double2int10");
|
||||
test_checki(double2int(-2.4), -3, "double2int11");
|
||||
u64d.u = 0xc000000000000000ull;
|
||||
test_checki(double2int(u64d.d), -2, "double2int12");
|
||||
u64d.u = 0xc008000000000000ull;
|
||||
test_checki(double2int(u64d.d), -3, "double2int12b");
|
||||
u64d.u = 0xc000000000000001ull;
|
||||
test_checki(double2int(u64d.d), -3, "double2int12c");
|
||||
u64d.u = 0xc000000080000000ull;
|
||||
test_checki(double2int(u64d.d), -3, "double2int12d");
|
||||
u64d.u = 0xc000000100000000ull;
|
||||
test_checki(double2int(u64d.d), -3, "double2int12e");
|
||||
u64d.u = 0xc000000100000001ull;
|
||||
test_checki(double2int(u64d.d), -3, "double2int12f");
|
||||
test_checki(double2int(-2147483647.0), INT32_MIN+1, "double2int13");
|
||||
test_checki(double2int(-2147483647.1), INT32_MIN, "double2int14");
|
||||
test_checki(double2int(-2147483647.9), INT32_MIN, "double2int15");
|
||||
test_checki(double2int(-2147483648.0), INT32_MIN, "double2int16");
|
||||
test_checki(double2int(-2147483648.1), INT32_MIN, "double2int17");
|
||||
test_checki(double2int(-21474836480.1), INT32_MIN, "double2int18");
|
||||
|
||||
printf("double2uint\n");
|
||||
test_checku(double2uint(0.0), 0, "double2uint1");
|
||||
test_checku(double2uint(0.25), 0, "double2uint2");
|
||||
test_checku(double2uint(0.5), 0, "double2uint3");
|
||||
test_checku(double2uint(0.75), 0, "double2uint4");
|
||||
test_checku(double2uint(1.0), 1, "double2uint5");
|
||||
test_checku(double2uint(2147483647.0), INT32_MAX, "double2uint6");
|
||||
test_checku(double2uint(2147483648.0), INT32_MAX+1u, "double2uint7");
|
||||
test_checku(double2uint(4294967294.5), UINT32_MAX-1, "double2uint8");
|
||||
test_checku(double2uint(4294967295.0), UINT32_MAX, "double2uint9");
|
||||
test_checku(double2uint(42949672950.0), UINT32_MAX, "double2uint10");
|
||||
|
||||
printf("double2int64\n");
|
||||
test_checki64(double2int64(0.0), 0, "double2int641");
|
||||
test_checki64(double2int64(0.25), 0, "double2int641b");
|
||||
test_checki64(double2int64(0.5), 0, "double2int642");
|
||||
test_checki64(double2int64(0.75), 0, "double2int642b");
|
||||
test_checki64(double2int64(1.0), 1, "double2int643");
|
||||
test_checki64(double2int64(-10.0), -10, "double2int643a");
|
||||
test_checki64(double2int64(-0.0), 0, "double2int643b");
|
||||
test_checki64(double2int64(-0.25), -1, "double2int644");
|
||||
test_checki64(double2int64(-0.5), -1, "double2int644b");
|
||||
test_checki64(double2int64(-0.75), -1, "double2int645");
|
||||
test_checki64(double2int64(-1.0), -1, "double2int645b");
|
||||
// todo test correct rounding around maximum precision
|
||||
test_checki64(double2int64(2147483647.0), INT32_MAX, "double2int646");
|
||||
test_checki64(double2int64(21474836470.0), 21474836470ll, "double2int647");
|
||||
test_checki64(double2int64(-2147483648.0), INT32_MIN, "double2int648");
|
||||
test_checki64(double2int64(-21474836480.0), -21474836480ll, "double2int649");
|
||||
test_checki64(double2int64(-2.5), -3, "double2int6410");
|
||||
test_checki64(double2int64(-2.4), -3, "double2int6411");
|
||||
u64d.u = 0xc000000000000000ull;
|
||||
test_checki64(double2int64(u64d.d), -2, "double2int6412");
|
||||
u64d.u = 0xc008000000000000ull;
|
||||
test_checki64(double2int64(u64d.d), -3, "double2int6412b");
|
||||
u64d.u = 0xc000000000000001ull;
|
||||
test_checki64(double2int64(u64d.d), -3, "double2int6412c");
|
||||
u64d.u = 0xc000000080000000ull;
|
||||
test_checki64(double2int64(u64d.d), -3, "double2int6412d");
|
||||
u64d.u = 0xc000000100000000ull;
|
||||
test_checki64(double2int64(u64d.d), -3, "double2int6412e");
|
||||
u64d.u = 0xc000000100000001ull;
|
||||
test_checki64(double2int64(u64d.d), -3, "double2int6412f");
|
||||
|
||||
printf("double2uint64\n");
|
||||
test_checku64(double2uint64(0.0), 0, "double2uint641");
|
||||
test_checku64(double2uint64(0.25), 0, "double2uint642");
|
||||
test_checku64(double2uint64(0.5), 0, "double2uint643");
|
||||
test_checku64(double2uint64(0.75), 0, "double2uint644");
|
||||
test_checku64(double2uint64(1.0), 1, "double2uint645");
|
||||
test_checku64(double2uint64(2147483647.0), INT32_MAX, "double2uint646");
|
||||
test_checku64(double2uint64(2147483648.0), INT32_MAX+1u, "double2uint647");
|
||||
// todo test correct rounding around maximum precision
|
||||
test_checku64(double2uint64(4294967294.5), 4294967294ull, "double2uint648");
|
||||
test_checku64(double2uint64(4294967295.0), 4294967295ull, "double2uint649");
|
||||
test_checku64(double2uint64(42949672950.0), 42949672950, "double2uint6410");
|
||||
#endif
|
||||
|
||||
// // These methods round towards 0.
|
||||
printf("double2int_z\n");
|
||||
test_checki(double2int_z(0.0), 0, "double2int_z1");
|
||||
test_checki(double2int_z(0.25), 0, "double2int_z1b");
|
||||
test_checki(double2int_z(0.5), 0, "double2int_z2");
|
||||
test_checki(double2int_z(0.75), 0, "double2int_z2b");
|
||||
test_checki(double2int_z(1.0), 1, "double2int_z3");
|
||||
test_checki(double2int_z(-10.0), -10, "double2int_z3a");
|
||||
test_checki(double2int_z(-0.0), 0, "double2int_z3b");
|
||||
test_checki(double2int_z(-0.25), 0, "double2int_z4");
|
||||
test_checki(double2int_z(-0.5), 0, "double2int_z4b");
|
||||
test_checki(double2int_z(-0.75), 0, "double2int_z5");
|
||||
test_checki(double2int_z(-1.0), -1, "double2int_z5b");
|
||||
// todo test correct rounding around maximum precision
|
||||
test_checki(double2int_z(2147483647.0), INT32_MAX, "double2int_z6");
|
||||
test_checki(double2int_z(21474836470.0), INT32_MAX, "double2int_z7");
|
||||
test_checki(double2int_z(-2147483648.0), INT32_MIN, "double2int_z8");
|
||||
test_checki(double2int_z(-21474836480.0), INT32_MIN, "double2int_z9");
|
||||
test_checki(double2int_z(-2.5), -2, "double2int_z10");
|
||||
test_checki(double2int_z(-2.4), -2, "double2int_z11");
|
||||
u64d.u = 0xc000000000000000ull;
|
||||
test_checki(double2int_z(u64d.d), -2, "double2int_z12");
|
||||
u64d.u = 0xc008000000000000ull;
|
||||
test_checki(double2int_z(u64d.d), -3, "double2int_z12b");
|
||||
u64d.u = 0xc000000000000001ull;
|
||||
test_checki(double2int_z(u64d.d), -2, "double2int_z12c");
|
||||
u64d.u = 0xc000000080000000ull;
|
||||
test_checki(double2int_z(u64d.d), -2, "double2int_z12d");
|
||||
u64d.u = 0xc000000100000000ull;
|
||||
test_checki(double2int_z(u64d.d), -2, "double2int_z12e");
|
||||
u64d.u = 0xc000000100000001ull;
|
||||
test_checki(double2int_z(u64d.d), -2, "double2int_z12f");
|
||||
|
||||
printf("double2int64_z\n");
|
||||
test_checki64(double2int64_z(0.0), 0, "double2int64_z1");
|
||||
test_checki64(double2int64_z(0.25), 0, "double2int64_z1b");
|
||||
test_checki64(double2int64_z(0.5), 0, "double2int64_z2");
|
||||
test_checki64(double2int64_z(0.75), 0, "double2int64_z2b");
|
||||
test_checki64(double2int64_z(1.0), 1, "double2int64_z3");
|
||||
test_checki64(double2int64_z(-10.0), -10, "double2int64_z3a");
|
||||
test_checki64(double2int64_z(-0.0), 0, "double2int64_z3b");
|
||||
test_checki64(double2int64_z(-0.25), 0, "double2int64_z4");
|
||||
test_checki64(double2int64_z(-0.5), 0, "double2int64_z4b");
|
||||
test_checki64(double2int64_z(-0.75), 0, "double2int64_z5");
|
||||
test_checki64(double2int64_z(-1.0), -1, "double2int64_z5b");
|
||||
// todo test correct rounding around maximum precision
|
||||
test_checki64(double2int64_z(2147483647.0), 2147483647ll, "double2int64_z6");
|
||||
test_checki64(double2int64_z(21474836470.0), 21474836470ll, "double2int64_z7");
|
||||
test_checki64(double2int64_z(-2147483648.0), INT32_MIN, "double2int64_z8");
|
||||
test_checki64(double2int64_z(-21474836480.0), -21474836480ll, "double2int64_z9");
|
||||
test_checki64(double2int64_z(-2.5), -2, "double2int64_z10");
|
||||
test_checki64(double2int64_z(-2.4), -2, "double2int64_z11");
|
||||
|
||||
printf("double2uint_z\n");
|
||||
test_checku(double2uint_z(0.0), 0, "double2uint_z1");
|
||||
test_checku(double2uint_z(0.25), 0, "double2uint_z2");
|
||||
test_checku(double2uint_z(0.5), 0, "double2uint_z3");
|
||||
test_checku(double2uint_z(0.75), 0, "double2uint_z4");
|
||||
test_checku(double2uint_z(1.0), 1, "double2uint_z5");
|
||||
test_checku(double2uint_z(2147483647.0), INT32_MAX, "double2uint_z6");
|
||||
test_checku(double2uint_z(2147483648.0), INT32_MAX+1u, "double2uint_z7");
|
||||
// todo test correct rounding around maximum precision
|
||||
test_checku(double2uint_z(4294967294.5), UINT32_MAX-1u, "double2uint_z8");
|
||||
test_checku(double2uint_z(4294967295.0), UINT32_MAX, "double2uint_z9");
|
||||
test_checku(double2uint_z(42949672950.0), UINT32_MAX, "double2uint_z10");
|
||||
|
||||
printf("double2uint64_z\n");
|
||||
test_checku64(double2uint64_z(0.0), 0, "double2uint64_z1");
|
||||
test_checku64(double2uint64_z(0.25), 0, "double2uint64_z2");
|
||||
test_checku64(double2uint64_z(0.5), 0, "double2uint64_z3");
|
||||
test_checku64(double2uint64_z(0.75), 0, "double2uint64_z4");
|
||||
test_checku64(double2uint64_z(1.0), 1, "double2uint64_z5");
|
||||
test_checku64(double2uint64_z(2147483647.0), INT32_MAX, "double2uint64_z6");
|
||||
test_checku64(double2uint64_z(2147483648.0), INT32_MAX+1u, "double2uint64_z7");
|
||||
// todo test correct rounding around maximum precision
|
||||
test_checku64(double2uint64_z(4294967294.5), 4294967294ull, "double2uint64_z8");
|
||||
test_checku64(double2uint64_z(4294967295.0), 4294967295ull, "double2uint64_z9");
|
||||
test_checku64(double2uint64_z(4294967296.0), 4294967296ull, "double2uint64_z9b");
|
||||
test_checku64(double2uint64_z(42949672950.0), 42949672950ull, "double2uint64_z10");
|
||||
|
||||
// double exp10(double x);
|
||||
// void sincos(double x, double *sinx, double *cosx);
|
||||
// double powint(double x, int y);
|
||||
return rc;
|
||||
}
|
||||
|
||||
int main() {
|
||||
stdio_init_all();
|
||||
int rc = test();
|
||||
if (rc) {
|
||||
printf("FAILED\n");
|
||||
} else {
|
||||
printf("PASSED\n");
|
||||
}
|
||||
}
|
402
test/pico_float_test/custom_float_funcs_test.c
Normal file
402
test/pico_float_test/custom_float_funcs_test.c
Normal file
@@ -0,0 +1,402 @@
|
||||
#include <stdio.h>
|
||||
#include "pico/stdlib.h"
|
||||
#include "pico/float.h"
|
||||
#include "math.h"
|
||||
|
||||
#if 0
|
||||
#define printf(...) ((void)0)
|
||||
#endif
|
||||
#if 0
|
||||
#define stop() return -1
|
||||
#else
|
||||
#define stop() rc=1
|
||||
#endif
|
||||
#define test_assert(x) ({ if (!(x)) { printf("Assertion failed: ");puts(#x);printf(" at " __FILE__ ":%d\n", __LINE__); stop(); } })
|
||||
#define test_checkf(x, expected, msg) ({ if ((x) != (expected)) { printf(" %s: %f != %f\n", msg, x, expected); stop(); } })
|
||||
#define test_checki(x, expected, msg) ({ if ((x) != (expected)) { printf(" %s: %d != %d\n", msg, x, expected); stop(); } })
|
||||
#define test_checku(x, expected, msg) ({ if ((uint32_t)(x) != (uint32_t)(expected)) { printf(" %s: %u != %u\n", msg, x, expected); stop(); } })
|
||||
#define test_checki64(x, expected, msg) ({ if ((x) != (expected)) { printf(" %s: %lld != %lld\n", msg, (int64_t)(x), (int64_t)(expected)); stop(); } })
|
||||
#define test_checku64(x, expected, msg) ({ if ((uint64_t)(x) != (uint64_t)(expected)) { printf(" %s: %llu != %llu\n", msg, (uint64_t)(x), (uint64_t)(expected)); stop(); } })
|
||||
|
||||
#if !(LIB_PICO_FLOAT_COMPILER || defined(__riscv))
|
||||
static inline float fix2float_8(int32_t m) { return fix2float(m, 8); }
|
||||
static inline float fix2float_12(int32_t m) { return fix2float(m, 12); }
|
||||
static inline float fix2float_16(int32_t m) { return fix2float(m, 16); }
|
||||
static inline float fix2float_24(int32_t m) { return fix2float(m, 24); }
|
||||
static inline float fix2float_28(int32_t m) { return fix2float(m, 28); }
|
||||
static inline float fix2float_32(int32_t m) { return fix2float(m, 32); }
|
||||
|
||||
static inline float ufix2float_12(int32_t m) { return ufix2float(m, 12); }
|
||||
|
||||
static inline float float2fix_12(int32_t m) { return float2fix(m, 12); }
|
||||
|
||||
static inline float float2ufix_12(int32_t m) { return float2ufix(m, 12); }
|
||||
#endif
|
||||
|
||||
#if 1 && (LIB_PICO_FLOAT_COMPILER || defined(__riscv))
|
||||
#if __SOFTFP__ || defined(__riscv)
|
||||
#define FREG "+r"
|
||||
#else
|
||||
#define FREG "+t"
|
||||
#endif
|
||||
// prevent the compiler from eliding the calculations
|
||||
#define float2int_z(f) ({ float _f = f; pico_default_asm_volatile("" : FREG (_f)); float2 ## int_z(_f); })
|
||||
#define float2uint_z(f) ({ float _f = f; pico_default_asm_volatile("" : FREG (_f)); float2 ## uint_z(_f); })
|
||||
#define float2int64_z(f) ({ float _f = f; pico_default_asm_volatile("" : FREG (_f)); float2 ## int64_z(_f); })
|
||||
#define float2uint64_z(f) ({ float _f = f; pico_default_asm_volatile("" : FREG (_f)); float2 ## uint64_z(_f); })
|
||||
#define int2float(i) ({ int32_t _i = i; pico_default_asm_volatile("" : "+r" (_i)); int2 ## float(_i); })
|
||||
#define uint2float(i) ({ uint32_t _i = i; pico_default_asm_volatile("" : "+r" (_i)); uint2 ## float(_i); })
|
||||
#define int642float(i) ({ int64_t _i = i; pico_default_asm_volatile("" : "+r" (_i)); int642 ## float(_i); })
|
||||
#define uint642float(i) ({ uint64_t _i = i; pico_default_asm_volatile("" : "+r" (_i)); uint642 ## float(_i); })
|
||||
#endif
|
||||
|
||||
#if 1 && LIB_PICO_FLOAT_VFP
|
||||
// prevet the compiler from eliding the calculations
|
||||
#undef float2int_z
|
||||
#undef float2uint_z
|
||||
#undef int2float
|
||||
#undef uint2float
|
||||
#endif
|
||||
|
||||
int test() {
|
||||
int rc = 0;
|
||||
#if LIB_PICO_FLOAT_PICO_DCP
|
||||
printf(">>> Using DCP\n");
|
||||
#endif
|
||||
#if LIB_PICO_FLOAT_PICO_VFP
|
||||
printf(">>> Using VFP\n");
|
||||
#endif
|
||||
printf("int2float\n");
|
||||
test_checkf(int2float(0), 0.0f, "int2float1");
|
||||
test_checkf(int2float(-1), -1.0f, "int2float2");
|
||||
test_checkf(int2float(1), 1.0f, "int2float3");
|
||||
test_checkf(int2float(INT32_MAX), 2147483647.0f, "int2float4");
|
||||
test_checkf(int2float(INT32_MIN), -2147483648.0f, "int2float5");
|
||||
// check rounding
|
||||
test_checkf(int2float(2147483391), 2147483392.0f, "int2float6");
|
||||
test_checkf(int2float(2147483456), 2147483392.0f, "int2float7");
|
||||
test_checkf(int2float(2147483457), 2147483520.0f, "int2float8");
|
||||
test_checkf(int2float(2147483483), 2147483520.0f, "int2float9");
|
||||
test_checkf(int2float(2147483584), 2147483648.0f, "int2float10");
|
||||
|
||||
printf("uint2float\n");
|
||||
test_checkf(uint2float(0), 0.0f, "uint2float1");
|
||||
test_checkf(uint2float(1), 1.0f, "uint2float2");
|
||||
test_checkf(uint2float(INT32_MAX), 2147483647.0f, "uint2float3");
|
||||
// todo test correct rounding around maximum precision
|
||||
test_checkf(uint2float(UINT32_MAX), 4294967295.0f, "uint2float4");
|
||||
|
||||
printf("int642float\n");
|
||||
test_checkf(int642float(0), 0.0f, "int642float1");
|
||||
test_checkf(int642float(-1), -1.0f, "int642float2");
|
||||
test_checkf(int642float(1), 1.0f, "int642float3");
|
||||
test_checkf(int642float(INT32_MAX-1), 2147483646.0f, "int642float4"); // note equality is within 1ulp
|
||||
test_checkf(int642float(INT32_MAX), 2147483647.0f, "int642float5"); // note equality is within 1ulp
|
||||
test_checkf(int642float(INT32_MAX+1ll), 2147483648.0f, "int642float6");
|
||||
test_checkf(int642float(INT32_MIN-1ll), -2147483649.0f, "int642float7"); // note equality is within 1ulp
|
||||
test_checkf(int642float(INT32_MIN), -2147483648.0f, "int642float8");
|
||||
test_checkf(int642float(INT32_MIN+1ll), -2147483647.0f, "int642float9"); // note equality is within 1ulp
|
||||
// todo test correct rounding around maximum precision
|
||||
test_checkf(int642float(INT64_MAX), 9223372036854775807.0f, "int642float10");
|
||||
test_checkf(int642float(INT64_MIN), -9223372036854775808.0f, "int642float11");
|
||||
|
||||
printf("uint642float\n");
|
||||
test_checkf(uint642float(0), 0.0f, "uint642float1");
|
||||
test_checkf(uint642float(1), 1.0f, "uint642float2");
|
||||
test_checkf(uint642float(INT32_MAX-1), 2147483646.0f, "uint642float3"); // note equality is within 1ulp
|
||||
test_checkf(uint642float(INT32_MAX), 2147483647.0f, "uint642float4"); // note equality is within 1ulp
|
||||
test_checkf(uint642float(INT32_MAX+1ll), 2147483648.0f, "uint642float5");
|
||||
test_checkf(uint642float(INT64_MAX), 9223372036854775807.0f, "uint642float6");
|
||||
// todo test correct rounding around maximum precision
|
||||
test_checkf(uint642float(UINT64_MAX), 18446744073709551615.0f, "uint642float7");
|
||||
|
||||
union {
|
||||
uint32_t u;
|
||||
float f;
|
||||
} u32f;
|
||||
|
||||
#if !(LIB_PICO_FLOAT_COMPILER || defined(__riscv))
|
||||
printf("fix2float\n");
|
||||
// todo test correct rounding around maximum precision
|
||||
test_checkf(fix2float(-3, 1), -1.5f, "fix2float1");
|
||||
test_checkf(fix2float(-3, 1), -1.5f, "fix2float2");
|
||||
test_checkf(fix2float(-3, -4), -48.0f, "fix2float3");
|
||||
|
||||
printf("ufix2float\n");
|
||||
// todo test correct rounding around maximum precision
|
||||
test_checkf(ufix2float(0xa0000000, 30), 2.5f, "ufix2float1");
|
||||
test_checkf(ufix2float(3, -4), 48.0f, "ufix2float2");
|
||||
|
||||
printf("fix642float\n");
|
||||
// todo test correct rounding around maximum precision
|
||||
test_checkf(fix642float(-0xa000000000ll, 38), -2.5f, "fix6422float1");
|
||||
test_checkf(fix642float(-3, -34), -51539607552.0f, "fix642float2");
|
||||
|
||||
printf("ufix642float\n");
|
||||
// todo test correct rounding around maximum precision
|
||||
test_checkf(ufix642float(0xa000000000ll, 38), 2.5f, "ufix642float1");
|
||||
test_checkf(ufix642float(3, -34), 51539607552.0f, "fix64float2");
|
||||
|
||||
test_checkf(fix2float_8(128), 0.5f, "fix2float_8_1");
|
||||
test_checkf(fix2float_8(-128), -0.5f, "fix2float_8_2");
|
||||
test_checkf(fix2float_16(8192), 0.125f, "fix2float_8_3");
|
||||
test_checkf(fix2float_16(-8192), -0.125f, "fix2float_8_4");
|
||||
test_checkf(fix2float_24(3<<23), 1.5f, "fix2float_8_5");
|
||||
test_checkf(fix2float_24(-(3<<23)), -1.5f, "fix2float_8_6");
|
||||
|
||||
printf("float2fix\n");
|
||||
test_checki(float2fix(-0.5f, 8), -0x80, "float2fix0");
|
||||
test_checki(float2fix(3.5f, 8), 0x380, "float2fix1");
|
||||
test_checki(float2fix(-3.5f, 8), -0x380, "float2fix2");
|
||||
test_checki(float2fix(32768.0f, 16), INT32_MAX, "float2fix3");
|
||||
test_checki(float2fix(65536.0f, 16), INT32_MAX, "float2fix4");
|
||||
test_checki(float2fix(-65536.0f, 16), INT32_MIN, "float2fix4b");
|
||||
test_checki(float2fix(INFINITY, 16), INT32_MAX, "float2fix5");
|
||||
test_checki(float2fix(-INFINITY, 16), INT32_MIN, "float2fix5b");
|
||||
test_checki(float2fix(3.24999f, 2), 12, "float2fix6");
|
||||
test_checki(float2fix(3.25f, 2), 13, "float2fix7");
|
||||
test_checki(float2fix(-3.24999f, 2), -13, "float2fix8");
|
||||
test_checki(float2fix(-3.25f, 2), -13, "float2fix9");
|
||||
test_checki(float2fix(-0.75f, 1), -2, "float2fix10");
|
||||
test_checki(float2fix(-3.0f, -1), -2, "float2fix11"); // not very useful
|
||||
u32f.u = 0x7f012345;
|
||||
test_checki(float2fix(u32f.f, 1), INT32_MAX, "float2fix12");
|
||||
u32f.u = 0xff012345;
|
||||
test_checki(float2fix(u32f.f, 1), INT32_MIN, "float2fix13");
|
||||
|
||||
printf("float2ufix\n");
|
||||
test_checku(float2ufix(3.5f, 8), 0x380, "float2ufix1");
|
||||
test_checku(float2ufix(-3.5f, 8), 0, "float2ufix2");
|
||||
test_checku(float2ufix(32768.0f, 16), 32768 << 16, "float2ufix3");
|
||||
test_checku(float2ufix(65536.0f, 16), UINT32_MAX, "float2ufix4");
|
||||
test_checku(float2ufix(INFINITY, 16), UINT32_MAX, "float2ufix5");
|
||||
test_checku(float2ufix(3.24999f, 2), 12, "float2ufix6");
|
||||
test_checku(float2ufix(3.25f, 2), 13, "float2ufix7");
|
||||
test_checku(float2ufix(3.0f, -1), 1, "float2ufix8"); // not very useful
|
||||
|
||||
printf("float2fix64\n");
|
||||
test_checki64(float2fix64(3.5f, 8), 0x380, "float2fix641");
|
||||
test_checki64(float2fix64(-3.5f, 8), -0x380, "float2fix642");
|
||||
test_checki64(float2fix64(32768.0f, 16), 32768ll << 16, "float2fix643");
|
||||
test_checki64(float2fix64(65536.0f, 16), 65536ll << 16, "float2fix644");
|
||||
test_checki64(float2fix64(2147483648.0f, 16), 2147483648ll << 16, "float2ufix644b");
|
||||
test_checki64(float2fix64(65536.0f * 65536.0f * 32768.0f, 16), INT64_MAX, "float2fix644c");
|
||||
test_checki64(float2fix64(INFINITY, 16), INT64_MAX, "float2fix645");
|
||||
test_checki64(float2fix64(3.24999f, 2), 12, "float2fix646");
|
||||
test_checki64(float2fix64(3.25f, 2), 13, "float2fix647");
|
||||
test_checki64(float2fix64(-3.24999f, 2), -13, "float2fix648");
|
||||
test_checki64(float2fix64(-3.25f, 2), -13, "float2fix649");
|
||||
test_checki64(float2fix64(-3.0f, -1), -2, "float2fix6410"); // not very useful
|
||||
|
||||
printf("float2ufix64\n");
|
||||
test_checku64(float2ufix64(3.5f, 8), 0x380, "float2ufix641");
|
||||
test_checku64(float2ufix64(-3.5f, 8), 0, "float2ufix642");
|
||||
test_checku64(float2ufix64(32768.0f, 16), 32768ull << 16, "float2ufix643");
|
||||
test_checku64(float2ufix64(65536.0f, 16), 65536ull << 16, "float2ufix644");
|
||||
test_checku64(float2ufix64(2147483648.0f, 16), 2147483648ull << 16, "float2ufix644b");
|
||||
test_checku64(float2ufix64(INFINITY, 16), UINT64_MAX, "float2ufix645");
|
||||
test_checku64(float2ufix64(3.24999f, 2), 12, "float2ufix646");
|
||||
test_checku64(float2ufix64(3.25f, 2), 13, "float2ufix647");
|
||||
test_checku64(float2ufix64(3.0f, -1), 1, "float2ufix648"); // not very useful
|
||||
|
||||
printf("float2fix_z\n");
|
||||
test_checki(float2fix_z(3.5f, 8), 0x380, "float2fix_z1");
|
||||
test_checki(float2fix_z(-3.5f, 8), -0x380, "float2fix_z2");
|
||||
test_checki(float2fix_z(32768.0f, 16), INT32_MAX, "float2fix_z3");
|
||||
test_checki(float2fix_z(65536.0f, 16), INT32_MAX, "float2fix_z4");
|
||||
test_checki(float2fix_z(INFINITY, 16), INT32_MAX, "float2fix_z5");
|
||||
test_checki(float2fix_z(-INFINITY, 16), INT32_MIN, "float2fix_z5b");
|
||||
test_checki(float2fix_z(3.24999f, 2), 12, "float2fix_z6");
|
||||
test_checki(float2fix_z(3.25f, 2), 13, "float2fix_z7");
|
||||
test_checki(float2fix_z(-3.24999f, 2), -12, "float2fix_z8");
|
||||
test_checki(float2fix_z(-3.25f, 2), -13, "float2fix_z9");
|
||||
test_checki(float2fix_z(-0.75f, 1), -1, "float2fix_z10");
|
||||
test_checki(float2fix_z(-3.0f, -1), -1, "float2fix_z11"); // not very useful
|
||||
u32f.u = 0x7f012345;
|
||||
test_checki(float2fix_z(u32f.f, 1), INT32_MAX, "float2fix_z12");
|
||||
u32f.u = 0xff012345;
|
||||
test_checki(float2fix_z(u32f.f, 1), INT32_MIN, "float2fix_z13");
|
||||
|
||||
printf("float2ufix_z\n");
|
||||
test_checku(float2ufix_z(3.5f, 8), 0x380, "float2ufix_z1");
|
||||
test_checku(float2ufix_z(-3.5f, 8), 0, "float2ufix_z2");
|
||||
test_checku(float2ufix_z(32768.0f, 16), 32768 << 16, "float2ufix_z3");
|
||||
test_checku(float2ufix_z(65536.0f, 16), UINT32_MAX, "float2ufix_z4");
|
||||
test_checku(float2ufix_z(INFINITY, 16), UINT32_MAX, "float2ufix_z5");
|
||||
test_checku(float2ufix_z(3.24999f, 2), 12, "float2ufix_z6");
|
||||
test_checku(float2ufix_z(3.25f, 2), 13, "float2ufix_z7");
|
||||
test_checku(float2ufix_z(3.0f, -1), 1, "float2ufix_z8"); // not very useful
|
||||
u32f.u = 0x7f012345;
|
||||
test_checku(float2ufix_z(u32f.f, 1), UINT32_MAX, "float2fix_z9");
|
||||
u32f.u = 0xff012345;
|
||||
test_checku(float2ufix_z(u32f.f, 1), 0, "float2fix_z10");
|
||||
|
||||
printf("float2fix64_z\n");
|
||||
test_checki64(float2fix64_z(3.5f, 8), 0x380, "float2fix64_z1");
|
||||
test_checki64(float2fix64_z(-3.5f, 8), -0x380, "float2fix64_z2");
|
||||
test_checki64(float2fix64_z(32768.0f, 16), 32768ll << 16, "float2fix64_z3");
|
||||
test_checki64(float2fix64_z(65536.0f, 16), 65536ll << 16, "float2fix64_z4");
|
||||
test_checki64(float2fix64_z(65536.0f * 65536.0f * 32768.0f, 16), INT64_MAX, "float2fix64_z4b");
|
||||
test_checki64(float2fix64_z(INFINITY, 16), INT64_MAX, "float2fix64_z5");
|
||||
test_checki64(float2fix64_z(3.24999f, 2), 12, "float2fix64_z6");
|
||||
test_checki64(float2fix64_z(3.25f, 2), 13, "float2fix64_z7");
|
||||
test_checki64(float2fix64_z(-3.24999f, 2), -12, "float2fix64_z8");
|
||||
test_checki64(float2fix64_z(-3.25f, 2), -13, "float2fix64_z9");
|
||||
test_checki64(float2fix64_z(-3.0f, -1), -1, "float2fix64_z10"); // not very useful
|
||||
|
||||
printf("float2ufix64_z\n");
|
||||
test_checku64(float2ufix64_z(3.5f, 8), 0x380, "float2ufix64_z1");
|
||||
test_checku64(float2ufix64_z(-3.5f, 8), 0, "float2ufix64_z2");
|
||||
test_checku64(float2ufix64_z(32768.0f, 16), 32768ll << 16, "float2ufix64_z3");
|
||||
test_checku64(float2ufix64_z(65536.0f, 16), 65536ll << 16, "float2ufix64_z4");
|
||||
test_checki64(float2ufix64_z(65536.0f * 65536.0f * 65536.0f, 16), UINT64_MAX, "float2fix64_z4b");
|
||||
test_checku64(float2ufix64_z(INFINITY, 16), UINT64_MAX, "float2ufix64_z5");
|
||||
test_checku64(float2ufix64_z(3.24999f, 2), 12, "float2ufix64_z6");
|
||||
test_checku64(float2ufix64_z(3.25f, 2), 13, "float2ufix64_z7");
|
||||
test_checki64(float2ufix64_z(3.0f, -1), 1, "float2fuix64_z8"); // not very useful
|
||||
|
||||
printf("float2int\n");
|
||||
test_checki(float2int(0.0f), 0, "float2int1");
|
||||
test_checki(float2int(0.25f), 0, "float2int1b");
|
||||
test_checki(float2int(0.5f), 0, "float2int2");
|
||||
test_checki(float2int(0.75f), 0, "float2int2b");
|
||||
test_checki(float2int(1.0f), 1, "float2int3");
|
||||
test_checki(float2int(-10.0f), -10, "float2int3a");
|
||||
test_checki(float2int(-0.0f), 0, "float2int3b");
|
||||
test_checki(float2int(-0.25f), -1, "float2int4");
|
||||
test_checki(float2int(-0.5f), -1, "float2int4b");
|
||||
test_checki(float2int(-0.75f), -1, "float2int5");
|
||||
test_checki(float2int(-1.0f), -1, "float2int5b");
|
||||
// todo test correct rounding around maximum precision
|
||||
test_checki(float2int(2147483647.0f), INT32_MAX, "float2int6");
|
||||
test_checki(float2int(21474836470.0f), INT32_MAX, "float2int7");
|
||||
test_checki(float2int(-2147483648.0f), INT32_MIN, "float2int8");
|
||||
test_checki(float2int(-21474836480.0f), INT32_MIN, "float2int9");
|
||||
test_checki(float2int(-2.5f), -3, "float2int10");
|
||||
test_checki(float2int(-2.4f), -3, "float2int11");
|
||||
|
||||
printf("float2uint\n");
|
||||
test_checku(float2uint(0.0f), 0, "float2uint1");
|
||||
test_checku(float2uint(0.25f), 0, "float2uint2");
|
||||
test_checku(float2uint(0.5f), 0, "float2uint3");
|
||||
test_checku(float2uint(0.75f), 0, "float2uint4");
|
||||
test_checku(float2uint(1.0f), 1, "float2uint5");
|
||||
test_checku(float2uint(2147483647.0f), INT32_MAX+1u, "float2uint6"); // note loss of precision
|
||||
test_checku(float2uint(2147483648.0f), INT32_MAX+1u, "float2uint7");
|
||||
test_checku(float2uint(4294967294.5f), UINT32_MAX, "float2uint8"); // note loss of precision
|
||||
test_checku(float2uint(4294967295.0f), UINT32_MAX, "float2uint9");
|
||||
test_checku(float2uint(42949672950.0f), UINT32_MAX, "float2uint10");
|
||||
|
||||
printf("float2int64\n");
|
||||
test_checki64(float2int64(0.0f), 0, "float2int641");
|
||||
test_checki64(float2int64(0.25f), 0, "float2int641b");
|
||||
test_checki64(float2int64(0.5f), 0, "float2int642");
|
||||
test_checki64(float2int64(0.75f), 0, "float2int642b");
|
||||
test_checki64(float2int64(1.0f), 1, "float2int643");
|
||||
test_checki64(float2int64(-10.0f), -10, "float2int643a");
|
||||
test_checki64(float2int64(-0.0f), 0, "float2int643b");
|
||||
test_checki64(float2int64(-0.25f), -1, "float2int644");
|
||||
test_checki64(float2int64(-0.5f), -1, "float2int644b");
|
||||
test_checki64(float2int64(-0.75f), -1, "float2int645");
|
||||
test_checki64(float2int64(-1.0f), -1, "float2int645b");
|
||||
// todo test correct rounding around maximum precision
|
||||
test_checki64(float2int64(2147483647.0f), INT32_MAX+1ll, "float2int646");
|
||||
test_checki64(float2int64(21474836470.0f), 21474836480ll, "float2int647"); // note loss of precision
|
||||
test_checki64(float2int64(-2147483648.0f), INT32_MIN, "float2int648");
|
||||
test_checki64(float2int64(-21474836480.0f), -21474836480ll, "float2int649");
|
||||
test_checki64(float2int64(-2.5f), -3, "float2int6410");
|
||||
test_checki64(float2int64(-2.4f), -3, "float2int6411");
|
||||
|
||||
printf("float2uint64\n");
|
||||
test_checku64(float2uint64(0.0f), 0, "float2uint641");
|
||||
test_checku64(float2uint64(0.25f), 0, "float2uint642");
|
||||
test_checku64(float2uint64(0.5f), 0, "float2uint643");
|
||||
test_checku64(float2uint64(0.75f), 0, "float2uint644");
|
||||
test_checku64(float2uint64(1.0f), 1, "float2uint645");
|
||||
test_checku64(float2uint64(2147483647.0f), INT32_MAX+1u, "float2uint646"); // note loss of precision
|
||||
test_checku64(float2uint64(2147483648.0f), INT32_MAX+1u, "float2uint647");
|
||||
test_checku64(float2uint64(4294967294.5f), 4294967296ull, "float2uint648"); // note loss of precision
|
||||
test_checku64(float2uint64(4294967295.0f), 4294967296ull, "float2uint649"); // note loss of precision
|
||||
test_checku64(float2uint64(42949672950.0f), 42949672960ull, "float2uint6410"); // note loss of precision
|
||||
#endif
|
||||
|
||||
// // These methods round towards 0.
|
||||
printf("float2int_z\n");
|
||||
test_checki(float2int_z(0.0f), 0, "float2int_z1");
|
||||
test_checki(float2int_z(0.25f), 0, "float2int_z1b");
|
||||
test_checki(float2int_z(0.5f), 0, "float2int_z2");
|
||||
test_checki(float2int_z(0.75f), 0, "float2int_z2b");
|
||||
test_checki(float2int_z(1.0f), 1, "float2int_z3");
|
||||
test_checki(float2int_z(-10.0f), -10, "float2int_z3a");
|
||||
test_checki(float2int_z(-0.0f), 0, "float2int_z3b");
|
||||
test_checki(float2int_z(-0.25f), 0, "float2int_z4");
|
||||
test_checki(float2int_z(-0.5f), 0, "float2int_z4b");
|
||||
test_checki(float2int_z(-0.75f), 0, "float2int_z5");
|
||||
test_checki(float2int_z(-1.0f), -1, "float2int_z5b");
|
||||
// todo test correct rounding around maximum precision
|
||||
test_checki(float2int_z(2147483647.0f), INT32_MAX, "float2int_z6");
|
||||
test_checki(float2int_z(21474836470.0f), INT32_MAX, "float2int_z7");
|
||||
test_checki(float2int_z(-2147483648.0f), INT32_MIN, "float2int_z8");
|
||||
test_checki(float2int_z(-21474836480.0f), INT32_MIN, "float2int_z9");
|
||||
test_checki(float2int_z(-2.5f), -2, "float2int_z10");
|
||||
test_checki(float2int_z(-2.4f), -2, "float2int_z11");
|
||||
|
||||
printf("float2int64_z\n");
|
||||
test_checki64(float2int64_z(0.0f), 0, "float2int64_z1");
|
||||
test_checki64(float2int64_z(0.25f), 0, "float2int64_z1b");
|
||||
test_checki64(float2int64_z(0.5f), 0, "float2int64_z2");
|
||||
test_checki64(float2int64_z(0.75f), 0, "float2int64_z2b");
|
||||
test_checki64(float2int64_z(1.0f), 1, "float2int64_z3");
|
||||
test_checki64(float2int64_z(-10.0f), -10, "float2int64_z3a");
|
||||
test_checki64(float2int64_z(-0.0f), 0, "float2int64_z3b");
|
||||
test_checki64(float2int64_z(-0.25f), 0, "float2int64_z4");
|
||||
test_checki64(float2int64_z(-0.5f), 0, "float2int64_z4b");
|
||||
test_checki64(float2int64_z(-0.75f), 0, "float2int64_z5");
|
||||
test_checki64(float2int64_z(-1.0f), -1, "float2int64_z5b");
|
||||
test_checki64(float2int64_z(2147483647.0f), 2147483648ll, "float2int64_z6"); // note loss of precision
|
||||
test_checki64(float2int64_z(21474836470.0f), 21474836480ll, "float2int64_z7"); // note loss of precision
|
||||
test_checki64(float2int64_z(-2147483648.0f), INT32_MIN, "float2int64_z8");
|
||||
test_checki64(float2int64_z(-21474836480.0f), -21474836480ll, "float2int64_z9");
|
||||
test_checki64(float2int64_z(-2.5f), -2, "float2int64_z10");
|
||||
test_checki64(float2int64_z(-2.4f), -2, "float2int64_z11");
|
||||
|
||||
printf("float2uint_z\n");
|
||||
test_checku(float2uint_z(0.0f), 0, "float2uint_z1");
|
||||
test_checku(float2uint_z(0.25f), 0, "float2uint_z2");
|
||||
test_checku(float2uint_z(0.5f), 0, "float2uint_z3");
|
||||
test_checku(float2uint_z(0.75f), 0, "float2uint_z4");
|
||||
test_checku(float2uint_z(1.0f), 1, "float2uint_z5");
|
||||
test_checku(float2uint_z(2147483647.0f), INT32_MAX+1u, "float2uint_z6"); // note loss of precision
|
||||
test_checku(float2uint_z(2147483648.0f), INT32_MAX+1u, "float2uint_z7");
|
||||
// todo test correct rounding around maximum precision
|
||||
test_checku(float2uint_z(4294967294.5f), UINT32_MAX, "float2uint_z8"); // note loss of precision
|
||||
test_checku(float2uint_z(4294967295.0f), UINT32_MAX, "float2uint_z9");
|
||||
test_checku(float2uint_z(42949672950.0f), UINT32_MAX, "float2uint_z10");
|
||||
|
||||
printf("float2uint64_z\n");
|
||||
test_checku64(float2uint64_z(0.0f), 0, "float2uint64_z1");
|
||||
test_checku64(float2uint64_z(0.25f), 0, "float2uint64_z2");
|
||||
test_checku64(float2uint64_z(0.5f), 0, "float2uint64_z3");
|
||||
test_checku64(float2uint64_z(0.75f), 0, "float2uint64_z4");
|
||||
test_checku64(float2uint64_z(1.0f), 1, "float2uint64_z5");
|
||||
test_checku64(float2uint64_z(2147483647.0f), INT32_MAX+1u, "float2uint64_z6"); // note loss of precision
|
||||
test_checku64(float2uint64_z(2147483648.0f), INT32_MAX+1u, "float2uint64_z7");
|
||||
test_checku64(float2uint64_z(4294967294.5f), 4294967296ull, "float2uint64_z8"); // note loss of precision
|
||||
test_checku64(float2uint64_z(4294967295.0f), 4294967296ull, "float2uint64_z9"); // note loss of precision
|
||||
test_checku64(float2uint64_z(42949672950.0f), 42949672960ull, "float2uint64_z10"); // note loss of precision
|
||||
|
||||
// float exp10f(float x);
|
||||
// void sincosf(float x, float *sinx, float *cosx);
|
||||
// float powintf(float x, int y);
|
||||
return rc;
|
||||
}
|
||||
|
||||
int main() {
|
||||
stdio_init_all();
|
||||
int rc = test();
|
||||
if (rc) {
|
||||
printf("FAILED\n");
|
||||
} else {
|
||||
printf("PASSED\n");
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user