1
0
mirror of https://github.com/raspberrypi/pico-sdk.git synced 2025-08-06 06:02:39 +03:00

rationalize pico_float/pico_double libraries (#2208)

* on RP2350 _dcp variant now enables -msoft-float, since if you're using this at all it is likely because you don't want to use the VFP unit at all (to save stack space)
* implement all float_ and double_ conversion functions in all pico_float_pico_ variants and pico_double_pico on RP2040 and RP2350 (many were missing in some combinations)
* provide better granularity of what functions are wrapped in each case

also marked custom_xxx_funcs_test.c as not in bazel build yet
This commit is contained in:
Graham Sanderson
2025-02-04 16:19:17 -06:00
committed by GitHub
parent 7d450bf097
commit e85c3e5515
17 changed files with 2012 additions and 142 deletions

View File

@@ -535,7 +535,7 @@ static inline void dma_channel_start(uint channel) {
*\endcode *\endcode
* *
* \if rp2350_specific * \if rp2350_specific
* RP2350 only: Due to errata RP12350-E5 (see the RP2350 datasheet for further detail), it is necessary to clear the enable bit of * RP2350 only: Due to errata RP2350-E5 (see the RP2350 datasheet for further detail), it is necessary to clear the enable bit of
* the aborted channel and any chained channels prior to the abort to prevent re-triggering. * the aborted channel and any chained channels prior to the abort to prevent re-triggering.
* \endif * \endif
* *

View File

@@ -7,7 +7,7 @@
#include "pico/asm_helper.S" #include "pico/asm_helper.S"
#if !HAS_DOUBLE_COPROCESSOR #if !HAS_DOUBLE_COPROCESSOR
#error attempt to compile double_aeabi_rp2350 when there is no DCP #error attempt to compile double_aeabi_dcp when there is no DCP
#else #else
#include "hardware/dcp_instr.inc.S" #include "hardware/dcp_instr.inc.S"
@@ -29,7 +29,7 @@ double_section WRAPPER_FUNC_NAME(\func)
// ============== STATE SAVE AND RESTORE =============== // ============== STATE SAVE AND RESTORE ===============
.macro saving_func type func .macro saving_func type func, opt_label1='-', opt_label2='-'
// Note we are usually 32-bit aligned already at this point, as most of the // Note we are usually 32-bit aligned already at this point, as most of the
// function bodies contain exactly two 16-bit instructions: bmi and bx lr. // function bodies contain exactly two 16-bit instructions: bmi and bx lr.
// We want the PCMP word-aligned. // We want the PCMP word-aligned.
@@ -41,6 +41,12 @@ double_section WRAPPER_FUNC_NAME(\func)
push {lr} // 16-bit instruction push {lr} // 16-bit instruction
bl generic_save_state // 32-bit instruction bl generic_save_state // 32-bit instruction
b 1f // 16-bit instruction b 1f // 16-bit instruction
.ifnc \opt_label1,'-'
regular_func \opt_label1
.endif
.ifnc \opt_label2,'-'
regular_func \opt_label2
.endif
// This is the actual entry point: // This is the actual entry point:
\type\()_func \func \type\()_func \func
PCMP apsr_nzcv PCMP apsr_nzcv
@@ -128,53 +134,124 @@ saving_func wrapper sqrt
dcp_dsqrt_m r0,r1,r0,r1,r0,r1,r2,r3,r12 dcp_dsqrt_m r0,r1,r0,r1,r0,r1,r2,r3,r12
saving_func_return saving_func_return
// todo not a real thing double_section dclassify
double_wrapper_section __aeabi_dclassify saving_func regular dclassify
saving_func wrapper __aeabi_dclassify
@ with correct rounding
dcp_dclassify_m apsr_nzcv,r0,r1 dcp_dclassify_m apsr_nzcv,r0,r1
saving_func_return saving_func_return
// ============== CONVERSION FUNCTIONS =============== // ============== CONVERSION FUNCTIONS ===============
double_wrapper_section __aeabi_d2f double_wrapper_section __aeabi_d2f
saving_func wrapper __aeabi_d2f saving_func wrapper __aeabi_d2f double2float
@ with rounding @ with rounding
dcp_double2float_m r0,r0,r1 dcp_double2float_m r0,r0,r1
saving_func_return saving_func_return
double_wrapper_section __aeabi_i2d double_wrapper_section __aeabi_i2d
saving_func wrapper __aeabi_i2d saving_func wrapper __aeabi_i2d int2double
dcp_int2double_m r0,r1,r0 dcp_int2double_m r0,r1,r0
saving_func_return saving_func_return
double_wrapper_section __aeabi_ui2d double_wrapper_section __aeabi_ui2d
saving_func wrapper __aeabi_ui2d saving_func wrapper __aeabi_ui2d uint2double
dcp_uint2double_m r0,r1,r0 dcp_uint2double_m r0,r1,r0
saving_func_return saving_func_return
double_section double2fix_z
saving_func regular double2fix_z
ubfx r3, r1, #20, #11
adds r3, r2
beq 1f // very small; we don't care that we might make a denormal
asrs ip, r3, #11
beq 1f
ite pl
movpl r3, #0x7ff
movsmi r3, #0
1:
bfi r1, r3, #20, #11
b double2int_z_entry
double_section double2ufix
saving_func regular double2ufix_z double2ufix
double2ufix_z_entry:
ubfx r3, r1, #20, #11
adds r3, r2
beq 1f // very small; we don't care that we might make a denormal
asrs ip, r3, #11
beq 1f
ite pl
lsrspl r3, r1, #20 // 0x7ff
movsmi r3, #0
1:
bfi r1, r3, #20, #11
b double2uint_z_entry
double_section double2fix
saving_func regular double2fix
ubfx r3, r1, #20, #11
cbz r3, 2f // 0 or denormal
adds r3, r2
beq 1f // very small; we don't care that we might make a denormal
asrs ip, r3, #11
beq 1f
ite pl
movpl r3, #0x7ff
movsmi r3, #0
1:
bfi r1, r3, #20, #11
b double2int_entry
2:
movs r0, #0
saving_func_return
double_section double2int
saving_func regular double2int
double2int_entry:
lsls r2, r1, #1
bcc double2int_z_entry // positive is ok for int64_z
lsrs r3, r2, #21
beq double2int_z_entry // 0 or -0 or denormal is ok for int_z
lsrs r2, #21
adds r2, #1
subs r2, r2, #0x400
bcc 1f // <1 means subtract 1
cmp r2, #31
bge double2int_z_entry // must be an integer or maxed out
lsls r3, r1, #12
adds r3, r3, r0, lsr #20 // r3 now has highest 32 mantissa bits
lsls r3, r2
orrs r3, r3, r0, lsl #12 // these bits are all guaranteed to be in the fraction
beq double2int_z_entry // integer
1:
dcp_double2int_m r0,r0,r1
subs r0, #1
saving_func_return
double_wrapper_section __aeabi_d2iz double_wrapper_section __aeabi_d2iz
saving_func wrapper __aeabi_d2iz saving_func wrapper __aeabi_d2iz double2int_z
double2int_z_entry:
@ with truncation towards 0 @ with truncation towards 0
dcp_double2int_m r0,r0,r1 dcp_double2int_m r0,r0,r1
// note: this works with either saved or not saved call as it is just a `bx lr`
saving_func_return saving_func_return
double_wrapper_section __aeabi_d2uiz double_wrapper_section __aeabi_d2uiz
saving_func wrapper __aeabi_d2uiz saving_func wrapper __aeabi_d2uiz double2uint double2uint_z
double2uint_z_entry:
@ with truncation towards 0 @ with truncation towards 0
dcp_double2uint_m r0,r0,r1 dcp_double2uint_m r0,r0,r1
saving_func_return saving_func_return
// todo not a real thing double_section double2int_r
double_wrapper_section __aeabi_d2i_r saving_func regular double2int_r
saving_func wrapper __aeabi_d2i_r
@ with rounding @ with rounding
dcp_double2int_r_m r0,r0,r1 dcp_double2int_r_m r0,r0,r1
saving_func_return saving_func_return
// todo not a real thing double_section double2uint_r
double_wrapper_section __aeabi_d2ui_r saving_func regular double2uint_r
saving_func wrapper __aeabi_d2ui_r
@ with rounding @ with rounding
dcp_double2uint_r_m r0,r0,r1 dcp_double2uint_r_m r0,r0,r1
saving_func_return saving_func_return
@@ -189,7 +266,6 @@ saving_func wrapper __aeabi_dcmpun
saving_func_return saving_func_return
double_wrapper_section __aeabi_dcmp double_wrapper_section __aeabi_dcmp
saving_func wrapper __aeabi_cdrcmple saving_func wrapper __aeabi_cdrcmple
dcp_dcmp_m apsr_nzcv,r2,r3,r0,r1 // with arguments reversed dcp_dcmp_m apsr_nzcv,r2,r3,r0,r1 // with arguments reversed
bvs cmp_nan bvs cmp_nan

View File

@@ -425,6 +425,7 @@ double_wrapper_section __aeabi_ui2d
double_wrapper_section __aeabi_i2d double_wrapper_section __aeabi_i2d
wrapper_func __aeabi_ui2d wrapper_func __aeabi_ui2d
regular_func uint2double
movs r1, #0 movs r1, #0
cmp r0, #0 cmp r0, #0
bne 2f bne 2f
@@ -432,6 +433,7 @@ wrapper_func __aeabi_ui2d
bx lr bx lr
// double FUNC_NAME(__aeabi_i2d)(int) integer to double (double precision) conversion // double FUNC_NAME(__aeabi_i2d)(int) integer to double (double precision) conversion
wrapper_func __aeabi_i2d wrapper_func __aeabi_i2d
regular_func int2double
asrs r1, r0, #31 asrs r1, r0, #31
eors r0, r1 eors r0, r1
subs r0, r1 subs r0, r1
@@ -506,6 +508,7 @@ regular_func double2int
// unsigned FUNC_NAME(__aeabi_d2uiz)(double) double (double precision) to unsigned C-style conversion [3] // unsigned FUNC_NAME(__aeabi_d2uiz)(double) double (double precision) to unsigned C-style conversion [3]
double_wrapper_section __aeabi_d2uiz double_wrapper_section __aeabi_d2uiz
wrapper_func __aeabi_d2uiz wrapper_func __aeabi_d2uiz
regular_func double2uint_z
regular_func double2uint regular_func double2uint
shimmable_table_tail_call SF_TABLE_FLOAT2UINT double2uint_shim shimmable_table_tail_call SF_TABLE_FLOAT2UINT double2uint_shim
@@ -528,11 +531,13 @@ regular_func ufix642double
// double FUNC_NAME(__aeabi_l2d)(long long) long long to double (double precision) conversion // double FUNC_NAME(__aeabi_l2d)(long long) long long to double (double precision) conversion
double_wrapper_section __aeabi_l2d double_wrapper_section __aeabi_l2d
wrapper_func __aeabi_l2d wrapper_func __aeabi_l2d
regular_func int642double
shimmable_table_tail_call SF_TABLE_INT642FLOAT int642double_shim shimmable_table_tail_call SF_TABLE_INT642FLOAT int642double_shim
// double FUNC_NAME(__aeabi_l2f)(long long) long long to double (double precision) conversion // double FUNC_NAME(__aeabi_l2f)(long long) long long to double (double precision) conversion
double_wrapper_section __aeabi_ul2d double_wrapper_section __aeabi_ul2d
wrapper_func __aeabi_ul2d wrapper_func __aeabi_ul2d
regular_func uint642double
shimmable_table_tail_call SF_TABLE_UINT642FLOAT uint642double_shim shimmable_table_tail_call SF_TABLE_UINT642FLOAT uint642double_shim
// long long FUNC_NAME(__aeabi_d2lz)(double) double (double precision) to long long C-style conversion [3] // long long FUNC_NAME(__aeabi_d2lz)(double) double (double precision) to long long C-style conversion [3]
@@ -566,22 +571,106 @@ regular_func double2int64
// unsigned long long FUNC_NAME(__aeabi_d2ulz)(double) double to unsigned long long C-style conversion [3] // unsigned long long FUNC_NAME(__aeabi_d2ulz)(double) double to unsigned long long C-style conversion [3]
double_wrapper_section __aeabi_d2ulz double_wrapper_section __aeabi_d2ulz
wrapper_func __aeabi_d2ulz wrapper_func __aeabi_d2ulz
regular_func double2uint64
regular_func double2uint64_z
shimmable_table_tail_call SF_TABLE_FLOAT2UINT64 double2uint64_shim shimmable_table_tail_call SF_TABLE_FLOAT2UINT64 double2uint64_shim
double_section double2fix64_z
regular_func double2fix64_z
lsls r3, r1, #1
bcc double2fix64 // input positive is ok for fix64
mov ip, r2
asrs r2, r3, #21
beq 3f // input zero or denormal, so just return zero
adds r2, #1
beq double2fix64 // input infinite/nan is ok for fix64
lsrs r3, #21
add r3, ip
movs r2, #1
negs r2, r2
lsrs r2, #22
subs r3, r2 // r3 = modified e - 0x3ff
bcc 3f // modified input < 1.0 means result is zero
cmp r3, #52
bge 2f // modified input must be an integer or infinite
adds r3, #12
mov r2, r1
lsls r2, r2, r3 // r2 has remaining fractional mantissa bits of r1
bne 1f // not integer as non zero fractional bits remain
subs r3, #32
asrs r2, r3, #31
bics r3, r3, r2
movs r2, r0
lsls r2, r2, r3
bne 1f // remaining fractional bits are non-zero, so argument was not an integer
2:
// integer
mov r2, ip
b double2fix64
3: // result is zero
movs r0, #0
movs r1, #0
bx lr
1:
push {lr}
mov r2, ip
bl double2fix64
movs r2, #0
adds r0, #1
adcs r1, r2
pop {pc}
double_section double2fix64 double_section double2fix64
regular_func double2fix64 regular_func double2fix64
shimmable_table_tail_call SF_TABLE_FLOAT2FIX64 double2fix64_shim shimmable_table_tail_call SF_TABLE_FLOAT2FIX64 double2fix64_shim
double_section double2ufix64 double_section double2ufix64
regular_func double2ufix64 regular_func double2ufix64
regular_func double2ufix64_z
shimmable_table_tail_call SF_TABLE_FLOAT2UFIX64 double2ufix64_shim shimmable_table_tail_call SF_TABLE_FLOAT2UFIX64 double2ufix64_shim
double_section double2fix double_section double2fix
regular_func double2fix regular_func double2fix
shimmable_table_tail_call SF_TABLE_FLOAT2FIX double2fix_shim shimmable_table_tail_call SF_TABLE_FLOAT2FIX double2fix_shim
double_section double2fix_z
regular_func double2fix_z
lsls r3, r1, #1
asrs r3, #21
beq 2f // input is zero or denormal
adds r3, #1
beq 3f // input is infinite or nan
// extract exponent again
lsls r3, r1, #1
lsrs r3, #21
// adjust
adds r3, r2
ble 2f // adjusted input is zero or dedornmal or < 1
lsrs r3, r3, #11
bne 3f // adjusted input is > infinite
lsls r2, r2, #20 // align exponent adjustment offset
adds r1, r1, r2 // we know adjustment is safe
b double2int_z
2:
// result is zero
movs r0, #0
bx lr
3:
movs r0, #0
subs r0, #1
lsrs r0, #1
asrs r1, #31
eors r0, r1
bx lr
double_section double2ufix double_section double2ufix
regular_func double2ufix regular_func double2ufix
regular_func double2ufix_z
shimmable_table_tail_call SF_TABLE_FLOAT2UFIX double2ufix_shim shimmable_table_tail_call SF_TABLE_FLOAT2UFIX double2ufix_shim
double_wrapper_section __aeabi_d2f double_wrapper_section __aeabi_d2f

View File

@@ -249,7 +249,69 @@ regular_func ufix2double
movs r1,#0 movs r1,#0
bx r14 bx r14
double_wrapper_section conv_dtoi64 double_section conv_dtoi64
regular_func double2int64
lsls r3, r1, #1
bcc double2int64_z // input positive is ok for int64_z
cmp r3, #0xffe00000
bcs double2int64_z // input is infinite
lsrs r3, #21
beq 2f // input zero or denormal, means answer remains zero
sub r3, #0x3ff
cmp r3, #0
blt 1f // input is less than 1.0
cmp r3, #52
bge double2int64_z // modified input must be an integer or infinite
adds r3, #12
lsls r2, r1, r3 // r2 has remaining fractional mantissa bits of r1
bne 1f // not integer as non zero fractional bits remain
subs r3, #32
bics r3, r3, r3, asr #31 // map negative shift to zero
lsls r3, r0, r3
beq double2int64_z // remaining fractional bits are 0, so argument was an integer
1:
push {lr}
bl double2int64_z
subs r0, #1
sbcs r1, r1, #0
pop {pc}
2:
movs r0, #0
movs r1, #0
bx lr
double_section conv_dtofix64
regular_func double2fix64
lsls r3, r1, #1
bcc double2fix64_z // input positive is ok for fix64_z
cmp r3, #0xffe00000
bcs double2fix64_z // input is infinite
lsrs r3, #21
beq 2f // input zero or denormal, means answer remains zero
sub r3, #0x3ff
adds r3, r2
blt 1f // modified input zero or denormal, or less than 1.0
cmp r3, #52
bge double2fix64_z // modified input must be an integer or infinite
adds r3, #12
lsls ip, r1, r3 // ip has remaining fractional mantissa bits of r1
bne 1f // not integer as non zero fractional bits remain
subs r3, #32
bics r3, r3, r3, asr #31 // map negative shift to zero
lsls r3, r0, r3
beq double2fix64_z // remaining fractional bits are 0, so argument was an integer
1:
push {lr}
bl double2fix64_z
subs r0, #1
sbcs r1, r1, #0
pop {pc}
2:
movs r0, #0
movs r1, #0
bx lr
double_wrapper_section conv_dtoi64_z
@ convert double to signed int64, rounding towards 0, clamping @ convert double to signed int64, rounding towards 0, clamping
wrapper_func __aeabi_d2lz wrapper_func __aeabi_d2lz

View File

@@ -582,7 +582,7 @@ wrapper_func fma
saving_func_return saving_func_return
double_wrapper_section __dmla double_section fma_fast
@ cf saving_func macro: but here we need to record the SP before the state save possibly changes it @ cf saving_func macro: but here we need to record the SP before the state save possibly changes it
1: 1:
push {lr} // 16-bit instruction push {lr} // 16-bit instruction
@@ -592,6 +592,7 @@ double_wrapper_section __dmla
@ r0:r1 m @ r0:r1 m
@ r2:r3 n @ r2:r3 n
@ [r13,#0] a @ [r13,#0] a
regular_func fma_fast
regular_func mla regular_func mla
mov r12,sp @ save the SP mov r12,sp @ save the SP
PCMP apsr_nzcv @ test the engaged flag PCMP apsr_nzcv @ test the engaged flag

View File

@@ -16,50 +16,153 @@ extern "C" {
#endif #endif
/** \file double.h /** \file double.h
* \defgroup pico_double pico_double * \defgroup pico_double pico_double
* *
* \brief Optimized double-precision floating point functions * \brief Optimized double-precision floating point functions
* *
* (Replacement) optimized implementations are provided of the following compiler built-ins * An application can take control of the floating point routines used in the application over and above what is provided by the compiler,
* and math library functions: * by depending on the pico_double library. A user might want to do this:
* *
* - __aeabi_dadd, __aeabi_ddiv, __aeabi_dmul, __aeabi_drsub, __aeabi_dsub, __aeabi_cdcmpeq, __aeabi_cdrcmple, __aeabi_cdcmple, __aeabi_dcmpeq, __aeabi_dcmplt, __aeabi_dcmple, __aeabi_dcmpge, __aeabi_dcmpgt, __aeabi_dcmpun, __aeabi_i2d, __aeabi_l2d, __aeabi_ui2d, __aeabi_ul2d, __aeabi_d2iz, __aeabi_d2lz, __aeabi_d2uiz, __aeabi_d2ulz, __aeabi_d2f * 1. To use optimized software implementations provided by the RP2-series device's bootrom or the SDK
* - sqrt, cos, sin, tan, atan2, exp, log, ldexp, copysign, trunc, floor, ceil, round, asin, acos, atan, sinh, cosh, tanh, asinh, acosh, atanh, exp2, log2, exp10, log10, pow,, hypot, cbrt, fmod, drem, remainder, remquo, expm1, log1p, fma * 2. To use optimized combined software/hardware implementations utilizing custom RP2-series hardware for acceleration
* - powint, sincos (GNU extensions) * 3. To control the amount of C compiler/library code bloat
* 4. To make sure no floating point is called at all
* *
* The following additional optimized functions are also provided: * The pico_double library comes in three main flavors:
* *
* - int2double, uint2double, int642double, uint642double, fix2double, ufix2double, fix642double, ufix642double * 1. `pico_double_none` - all floating point operations cause a \ref panic - no double-precision floating point code is included
* - double2fix, double2ufix, double2fix64, double2ufix64, double2int, double2uint, double2int64, double2uint64, double2int_z, double2int64_z, * 2. `pico_double_compiler` - no custom functions are provided; all double-precision floating point is handled by the C compiler/library
* - exp10, sincos, powint * 3. `pico_double_pico` - the smallest and fastest available for the platform, along with additional functionality (e.g. fixed point conversions) which are detailed below
* *
* On RP2350 the following additional functions are available; the _fast methods are faster but do not round correctly" * The user can control which version they want (e.g. **pico_double_xxx** by either setting the CMake global variable
* `PICO_DEFAULT_DOUBLE_IMPL=xxx`, or by using the CMake function `pico_set_double_implementation(<TARGET> xxx)`. Note that in the absence
* of either, pico_double_pico is used by default.
* *
* - ddiv_fast, sqrt_fast * \if rp2040_specific
* On RP2040, `pico_double_pico` uses optimized hand coded implementations from the bootrom and the SDK for both
* basic double-precision floating point operations and floating point math library functions. These implementations
* are generally faster and smaller than those provided by the C compiler/library, though they don't support all the features of a fully compliant
* floating point implementation; they are however usually fine for the majority of cases
* \endif
*
* \if rp2350_specific
* On RP2350, `pico_double_pico` uses RP2350 DCP instructions (double co-processor) to implement fast version of the basic
* arithmetic functions, and provides optimized M33 implementations of trignometric and scientific functions.
* These implementations are generally faster and smaller than those provided by the C compiler/library, though they don't support all the features of a fully compliant
* floating point implementation; they are however usually fine for the majority of cases
* \endif
*
* On Arm, (replacement) optimized implementations are provided for the following compiler built-ins
* and math library functions when using `pico_double_pico`:
*
* - basic arithmetic:
*
* __aeabi_dadd, __aeabi_ddiv, __aeabi_dmul, __aeabi_drsub, __aeabi_dsub
*
* - comparison:
*
* __aeabi_cfcmpeq, __aeabi_cfrcmple, __aeabi_cfcmple, __aeabi_dcmpeq, __aeabi_dcmplt, __aeabi_dcmple, __aeabi_dcmpge, __aeabi_dcmpgt, __aeabi_dcmpun
*
* - (u)int32 <-> double:
*
* __aeabi_i2d, __aeabi_ui2d, __aeabi_d2iz, __aeabi_d2uiz
*
* - (u)int64 <-> double:
*
* __aeabi_l2d, __aeabi_ul2d, __aeabi_d2lz, __aeabi_d2ulz
*
* - double -> float:
*
* __aeabi_d2d
*
* - basic trigonometric:
*
* sqrt, cos, sin, tan, atan2, exp, log
*
* - trigonometric and scientific
*
* ldexp, copysign, trunc, floor, ceil, round, asin, acos, atan, sinh, cosh, tanh, asinh, acosh, atanh, exp2, log2, exp10, log10, pow, hypot, cbrt, fmod, drem, remainder, remquo, expm1, log1p, fma
*
* - GNU exetnsions:
*
* powint, sincos
*
* On Arm, the following additional optimized functions are also provided when using `pico_double_pico`:
*
* - Conversions to/from integer types:
*
* - (u)int -> double (round to nearest):
*
* int2double, uint2double, int642double, uint642double
*
* - (u)double -> int (round towards zero):
*
* double2int_z, double2uint_z, double2int64_z, double2uint64_z
*
* - (u)double -> int (round towards -infinity):
*
* double2int, double2uint, double2int64, double2uint64
*
* - Conversions to/from fixed point integers:
*
* - (u)fix -> double (round to nearest):
*
* fix2double, ufix2double, fix642double, ufix642double
*
* - double -> (u)fix (round towards zero):
*
* double2fix_z, double2ufix_z, double2fix64_z, double2ufix64_z
*
* - double -> (u)fix (round towards -infinity):
*
* double2fix, double2ufix, double2fix64, double2ufix64
*
* - Even faster versions of divide and square-root functions that do not round correctly:
*
* ddiv_fast, sqrt_fast (these do not round correctly)
*
* - Faster unfused multiply and accumulate:
*
* mla (fast fma)
*
* \if rp2350_specific
* On RISC-V there is no custom double-precision floating point support, so `pico_double_pico` is equivalent to `pico_double_compiler`
* \endif
*/ */
#if !defined(__riscv) || PICO_COMBINED_DOCS
#if PICO_COMBINED_DOCS || !LIB_PICO_DOUBLE_COMPILER
double int2double(int32_t i); double int2double(int32_t i);
double uint2double(uint32_t u); double uint2double(uint32_t i);
double int642double(int64_t i); double int642double(int64_t i);
double uint642double(uint64_t u); double uint642double(uint64_t i);
double fix2double(int32_t m, int e); double fix2double(int32_t m, int e);
double ufix2double(uint32_t m, int e); double ufix2double(uint32_t m, int e);
double fix642double(int64_t m, int e); double fix642double(int64_t m, int e);
double ufix642double(uint64_t m, int e); double ufix642double(uint64_t m, int e);
// These methods round towards -Infinity. // These methods round towards 0, which IS the C way
int32_t double2fix(double d, int e); int32_t double2int_z(double f);
uint32_t double2ufix(double d, int e); int64_t double2int64_z(double f);
int64_t double2fix64(double d, int e); int32_t double2uint_z(double f);
uint64_t double2ufix64(double d, int e); int64_t double2uint64_z(double f);
int32_t double2int(double d); int32_t double2fix_z(double f, int e);
uint32_t double2uint(double d); uint32_t double2ufix_z(double f, int e);
int64_t double2int64(double d); int64_t double2fix64_z(double f, int e);
uint64_t double2uint64(double d); uint64_t double2ufix64_z(double f, int e);
// These methods round towards 0. // These methods round towards -Infinity - which IS NOT the C way for negative numbers;
int32_t double2int_z(double d); // as such the naming is not ideal, however is kept for backwards compatibility
int64_t double2int64_z(double d); int32_t double2int(double f);
uint32_t double2uint(double f);
int64_t double2int64(double f);
uint64_t double2uint64(double f);
int32_t double2fix(double f, int e);
uint32_t double2ufix(double f, int e);
int64_t double2fix64(double f, int e);
uint64_t double2ufix64(double f, int e);
#endif
double exp10(double x); double exp10(double x);
void sincos(double x, double *sinx, double *cosx); void sincos(double x, double *sinx, double *cosx);
@@ -67,8 +170,24 @@ double powint(double x, int y);
#if !PICO_RP2040 || PICO_COMBINED_DOCS #if !PICO_RP2040 || PICO_COMBINED_DOCS
double ddiv_fast(double n, double d); double ddiv_fast(double n, double d);
double sqrt_fast(double d); double sqrt_fast(double f);
double mla(double x, double y, double z); // note this is not fused double fma_fast(double x, double y, double z); // this is not fused
double mla(double x, double y, double z); // another name for fma_fast
#endif
#endif
#if LIB_PICO_DOUBLE_COMPILER || defined(__riscv)
// when using the compiler; we provide as many functions as we trivially can, though in the double case they are not optimal
static inline double int2double(int32_t i) { return (double)i; }
static inline double uint2double(uint32_t i) { return (double)i; }
static inline double int642double(int64_t i) { return (double)i; }
static inline double uint642double(uint64_t i) { return (double)i; }
static inline int32_t double2int_z(double d) { return (int32_t)d; }
static inline int64_t double2int64_z(double d) { return (int64_t)d; }
static inline int32_t double2uint_z(double d) { return (uint32_t)d; }
static inline int64_t double2uint64_z(double d) { return (uint64_t)d; }
#endif #endif
#ifdef __cplusplus #ifdef __cplusplus
@@ -76,4 +195,3 @@ double mla(double x, double y, double z); // note this is not fused
#endif #endif
#endif #endif

View File

@@ -2,13 +2,16 @@ load("//bazel:defs.bzl", "compatible_with_rp2", "incompatible_with_config")
package(default_visibility = ["//visibility:public"]) package(default_visibility = ["//visibility:public"])
_WRAP_FLOAT_AEABI_FLAGS = [ _WRAP_FLOAT_AEABI_ARITHMETIC_FLAGS = [
"-Wl,--wrap=__aeabi_fadd", "-Wl,--wrap=__aeabi_fadd",
"-Wl,--wrap=__aeabi_fdiv", "-Wl,--wrap=__aeabi_fdiv",
"-Wl,--wrap=__aeabi_fmul", "-Wl,--wrap=__aeabi_fmul",
"-Wl,--wrap=__aeabi_frsub", "-Wl,--wrap=__aeabi_frsub",
"-Wl,--wrap=__aeabi_fsub", "-Wl,--wrap=__aeabi_fsub",
"-Wl,--wrap=__aeabi_cfcmpeq", "-Wl,--wrap=__aeabi_cfcmpeq",
]
_WRAP_FLOAT_AEABI_CMP_FLAGS = [
"-Wl,--wrap=__aeabi_cfrcmple", "-Wl,--wrap=__aeabi_cfrcmple",
"-Wl,--wrap=__aeabi_cfcmple", "-Wl,--wrap=__aeabi_cfcmple",
"-Wl,--wrap=__aeabi_fcmpeq", "-Wl,--wrap=__aeabi_fcmpeq",
@@ -17,15 +20,27 @@ _WRAP_FLOAT_AEABI_FLAGS = [
"-Wl,--wrap=__aeabi_fcmpge", "-Wl,--wrap=__aeabi_fcmpge",
"-Wl,--wrap=__aeabi_fcmpgt", "-Wl,--wrap=__aeabi_fcmpgt",
"-Wl,--wrap=__aeabi_fcmpun", "-Wl,--wrap=__aeabi_fcmpun",
]
_WRAP_FLOAT_AEABI_CONV_32_FLAGS = [
"-Wl,--wrap=__aeabi_i2f", "-Wl,--wrap=__aeabi_i2f",
"-Wl,--wrap=__aeabi_l2f", "-Wl,--wrap=__aeabi_l2f",
"-Wl,--wrap=__aeabi_ui2f", "-Wl,--wrap=__aeabi_ui2f",
"-Wl,--wrap=__aeabi_ul2f", "-Wl,--wrap=__aeabi_ul2f",
]
_WRAP_FLOAT_AEABI_CONV_64_FLAGS = [
"-Wl,--wrap=__aeabi_f2iz", "-Wl,--wrap=__aeabi_f2iz",
"-Wl,--wrap=__aeabi_f2lz", "-Wl,--wrap=__aeabi_f2lz",
"-Wl,--wrap=__aeabi_f2uiz", "-Wl,--wrap=__aeabi_f2uiz",
"-Wl,--wrap=__aeabi_f2ulz", "-Wl,--wrap=__aeabi_f2ulz",
]
_WRAP_FLOAT_AEABI_CONV_DOUBLE_FLAGS = [
"-Wl,--wrap=__aeabi_f2d", "-Wl,--wrap=__aeabi_f2d",
]
_WRAP_FLOAT_SQRTF_FLAGS = [
"-Wl,--wrap=sqrtf", "-Wl,--wrap=sqrtf",
] ]
@@ -36,13 +51,16 @@ _WRAP_FLOAT_SCI_FLAGS = [
"-Wl,--wrap=atan2f", "-Wl,--wrap=atan2f",
"-Wl,--wrap=expf", "-Wl,--wrap=expf",
"-Wl,--wrap=logf", "-Wl,--wrap=logf",
"-Wl,--wrap=sincosf", # gnu
]
_WRAP_FLOAT_SCI_EXTRA_FLAGS = [
"-Wl,--wrap=ldexpf", "-Wl,--wrap=ldexpf",
"-Wl,--wrap=copysignf", "-Wl,--wrap=copysignf",
"-Wl,--wrap=truncf", "-Wl,--wrap=truncf",
"-Wl,--wrap=floorf", "-Wl,--wrap=floorf",
"-Wl,--wrap=ceilf", "-Wl,--wrap=ceilf",
"-Wl,--wrap=roundf", "-Wl,--wrap=roundf",
"-Wl,--wrap=sincosf", # gnu
"-Wl,--wrap=asinf", "-Wl,--wrap=asinf",
"-Wl,--wrap=acosf", "-Wl,--wrap=acosf",
"-Wl,--wrap=atanf", "-Wl,--wrap=atanf",
@@ -114,30 +132,31 @@ _PICO_FLOAT_IMPLS = [
], ],
"compatibility": incompatible_with_config("@platforms//cpu:riscv32") + ["//bazel/constraint:rp2040"], "compatibility": incompatible_with_config("@platforms//cpu:riscv32") + ["//bazel/constraint:rp2040"],
"extra_deps": [], "extra_deps": [],
"linkopts": _WRAP_FLOAT_AEABI_FLAGS + _WRAP_FLOAT_SCI_FLAGS, "linkopts": _WRAP_FLOAT_AEABI_ARITHMETIC_FLAGS + _WRAP_FLOAT_AEABI_CMP_FLAGS + _WRAP_FLOAT_AEABI_CONV_32_FLAGS + _WRAP_FLOAT_AEABI_CONV_64_FLAGS + _WRAP_FLOAT_AEABI_CONV_DOUBLE_FLAGS + _WRAP_FLOAT_SQRTF_FLAGS + _WRAP_FLOAT_SCI_FLAGS + _WRAP_FLOAT_SCI_EXTRA_FLAGS,
}, },
{ {
"name": "dcp", "name": "dcp",
"srcs": [ "srcs": [
"float_aeabi_dcp.S", "float_aeabi_dcp.S",
"float_conv_m33.S", "float_common_m33.S",
"float_math.c", "float_math.c",
"float_sci_m33.S", "float_sci_m33.S",
], ],
"compatibility": compatible_with_rp2() + incompatible_with_config("@platforms//cpu:riscv32") + incompatible_with_config("//bazel/constraint:rp2040"), "compatibility": compatible_with_rp2() + incompatible_with_config("@platforms//cpu:riscv32") + incompatible_with_config("//bazel/constraint:rp2040"),
"extra_deps": ["//src/rp2_common/hardware_dcp"], "extra_deps": ["//src/rp2_common/hardware_dcp"],
"linkopts": _WRAP_FLOAT_SCI_FLAGS, "linkopts": _WRAP_FLOAT_AEABI_ARITHMETIC_FLAGS + _WRAP_FLOAT_AEABI_CMP_FLAGS + _WRAP_FLOAT_AEABI_CONV_32_FLAGS + _WRAP_FLOAT_AEABI_CONV_64_FLAGS + _WRAP_FLOAT_AEABI_CONV_DOUBLE_FLAGS + _WRAP_FLOAT_SQRTF_FLAGS + _WRAP_FLOAT_SCI_FLAGS + _WRAP_FLOAT_SCI_EXTRA_FLAGS,
}, },
{ {
"name": "vfp", "name": "vfp",
"srcs": [ "srcs": [
"float_conv32_vfp.S",
"float_sci_m33_vfp.S", "float_sci_m33_vfp.S",
"float_conv_m33.S", "float_common_m33.S",
"float_math.c", "float_math.c",
], ],
"compatibility": compatible_with_rp2() + incompatible_with_config("@platforms//cpu:riscv32") + incompatible_with_config("//bazel/constraint:rp2040"), "compatibility": compatible_with_rp2() + incompatible_with_config("@platforms//cpu:riscv32") + incompatible_with_config("//bazel/constraint:rp2040"),
"extra_deps": ["//src/rp2_common/hardware_dcp"], "extra_deps": ["//src/rp2_common/hardware_dcp"],
"linkopts": _WRAP_FLOAT_SCI_FLAGS, "linkopts": _WRAP_FLOAT_AEABI_CONV_64_FLAGS + _WRAP_FLOAT_SCI_FLAGS + _WRAP_FLOAT_SCI_EXTRA_FLAGS,
}, },
{ {
"name": "single_hazard3", "name": "single_hazard3",
@@ -146,7 +165,7 @@ _PICO_FLOAT_IMPLS = [
], ],
"compatibility": compatible_with_rp2() + ["@platforms//cpu:riscv32"], "compatibility": compatible_with_rp2() + ["@platforms//cpu:riscv32"],
"extra_deps": ["//src/rp2_common/hardware_hazard3"], "extra_deps": ["//src/rp2_common/hardware_hazard3"],
"linkopts": _WRAP_FLOAT_SCI_FLAGS, "linkopts": _WRAP_FLOAT_SCI_EXTRA_FLAGS,
}, },
] ]
@@ -184,7 +203,7 @@ cc_library(
hdrs = ["include/pico/float.h"], hdrs = ["include/pico/float.h"],
defines = ["LIB_PICO_FLOAT_PICO=0"], defines = ["LIB_PICO_FLOAT_PICO=0"],
includes = ["include"], includes = ["include"],
linkopts = _WRAP_FLOAT_AEABI_FLAGS + _WRAP_FLOAT_SCI_FLAGS, linkopts = _WRAP_FLOAT_AEABI_ARITHMETIC_FLAGS + _WRAP_FLOAT_AEABI_CMP_FLAGS + _WRAP_FLOAT_AEABI_CONV_32_FLAGS + _WRAP_FLOAT_AEABI_CONV_64_FLAGS + _WRAP_FLOAT_AEABI_CONV_DOUBLE_FLAGS + _WRAP_FLOAT_SQRTF_FLAGS + _WRAP_FLOAT_SCI_FLAGS + _WRAP_FLOAT_SCI_EXTRA_FLAGS,
target_compatible_with = compatible_with_rp2(), target_compatible_with = compatible_with_rp2(),
visibility = ["//visibility:private"], visibility = ["//visibility:private"],
deps = [ deps = [

View File

@@ -18,13 +18,15 @@
$<IF:$<BOOL:$<TARGET_PROPERTY:PICO_TARGET_FLOAT_IMPL>>,$<TARGET_PROPERTY:PICO_TARGET_FLOAT_IMPL>,${PICO_DEFAULT_FLOAT_IMPL}>) $<IF:$<BOOL:$<TARGET_PROPERTY:PICO_TARGET_FLOAT_IMPL>>,$<TARGET_PROPERTY:PICO_TARGET_FLOAT_IMPL>,${PICO_DEFAULT_FLOAT_IMPL}>)
function(wrap_float_functions TARGET) function(wrap_float_functions TARGET)
cmake_parse_arguments(WRAP_FLOAT "NO_WRAP_AEABI;NO_WRAP_SCI" "" "" ${ARGN} ) cmake_parse_arguments(WRAP_FLOAT "NO_AEABI_ARITHMETIC;NO_AEABI_CMP;NO_AEABI_CONV_32;NO_AEABI_CONV_64;NO_AEABI_CONV_DOUBLE;NO_SQRTF;NO_SCI;NO_SCI_EXTRA" "" "" ${ARGN} )
if (NOT WRAP_FLOAT_NO_WRAP_AEABI) if (NOT WRAP_FLOAT_NO_AEABI_ARITHMETIC)
pico_wrap_function(${TARGET} __aeabi_fadd) pico_wrap_function(${TARGET} __aeabi_fadd)
pico_wrap_function(${TARGET} __aeabi_fdiv) pico_wrap_function(${TARGET} __aeabi_fdiv)
pico_wrap_function(${TARGET} __aeabi_fmul) pico_wrap_function(${TARGET} __aeabi_fmul)
pico_wrap_function(${TARGET} __aeabi_frsub) pico_wrap_function(${TARGET} __aeabi_frsub)
pico_wrap_function(${TARGET} __aeabi_fsub) pico_wrap_function(${TARGET} __aeabi_fsub)
endif()
if (NOT WRAP_FLOAT_NO_AEABI_CMP)
pico_wrap_function(${TARGET} __aeabi_cfcmpeq) pico_wrap_function(${TARGET} __aeabi_cfcmpeq)
pico_wrap_function(${TARGET} __aeabi_cfrcmple) pico_wrap_function(${TARGET} __aeabi_cfrcmple)
pico_wrap_function(${TARGET} __aeabi_cfcmple) pico_wrap_function(${TARGET} __aeabi_cfcmple)
@@ -34,32 +36,42 @@
pico_wrap_function(${TARGET} __aeabi_fcmpge) pico_wrap_function(${TARGET} __aeabi_fcmpge)
pico_wrap_function(${TARGET} __aeabi_fcmpgt) pico_wrap_function(${TARGET} __aeabi_fcmpgt)
pico_wrap_function(${TARGET} __aeabi_fcmpun) pico_wrap_function(${TARGET} __aeabi_fcmpun)
endif()
if (NOT WRAP_FLOAT_NO_AEABI_CONV_32)
pico_wrap_function(${TARGET} __aeabi_i2f) pico_wrap_function(${TARGET} __aeabi_i2f)
pico_wrap_function(${TARGET} __aeabi_l2f)
pico_wrap_function(${TARGET} __aeabi_ui2f) pico_wrap_function(${TARGET} __aeabi_ui2f)
pico_wrap_function(${TARGET} __aeabi_ul2f)
pico_wrap_function(${TARGET} __aeabi_f2iz) pico_wrap_function(${TARGET} __aeabi_f2iz)
pico_wrap_function(${TARGET} __aeabi_f2lz)
pico_wrap_function(${TARGET} __aeabi_f2uiz) pico_wrap_function(${TARGET} __aeabi_f2uiz)
endif()
if (NOT WRAP_FLOAT_NO_AEABI_CONV_64)
pico_wrap_function(${TARGET} __aeabi_l2f)
pico_wrap_function(${TARGET} __aeabi_ul2f)
pico_wrap_function(${TARGET} __aeabi_f2lz)
pico_wrap_function(${TARGET} __aeabi_f2ulz) pico_wrap_function(${TARGET} __aeabi_f2ulz)
endif()
if (NOT WRAP_FLOAT_NO_AEABI_CONV_DOUBLE)
pico_wrap_function(${TARGET} __aeabi_f2d) pico_wrap_function(${TARGET} __aeabi_f2d)
endif()
# separate as we have a direct DCP version
if (NOT WRAP_FLOAT_NO_SQRTF)
pico_wrap_function(${TARGET} sqrtf) pico_wrap_function(${TARGET} sqrtf)
endif() endif()
if (NOT WRAP_FLOAT_NO_WRAP_SCI) if (NOT WRAP_FLOAT_NO_SCI)
pico_wrap_function(${TARGET} cosf) pico_wrap_function(${TARGET} cosf)
pico_wrap_function(${TARGET} sinf) pico_wrap_function(${TARGET} sinf)
pico_wrap_function(${TARGET} tanf) pico_wrap_function(${TARGET} tanf)
pico_wrap_function(${TARGET} atan2f) pico_wrap_function(${TARGET} atan2f)
pico_wrap_function(${TARGET} expf) pico_wrap_function(${TARGET} expf)
pico_wrap_function(${TARGET} logf) pico_wrap_function(${TARGET} logf)
pico_wrap_function(${TARGET} sincosf) # gnu
endif()
if (NOT WRAP_FLOAT_NO_SCI_EXTRA)
pico_wrap_function(${TARGET} ldexpf) pico_wrap_function(${TARGET} ldexpf)
pico_wrap_function(${TARGET} copysignf) pico_wrap_function(${TARGET} copysignf)
pico_wrap_function(${TARGET} truncf) pico_wrap_function(${TARGET} truncf)
pico_wrap_function(${TARGET} floorf) pico_wrap_function(${TARGET} floorf)
pico_wrap_function(${TARGET} ceilf) pico_wrap_function(${TARGET} ceilf)
pico_wrap_function(${TARGET} roundf) pico_wrap_function(${TARGET} roundf)
pico_wrap_function(${TARGET} sincosf) # gnu
pico_wrap_function(${TARGET} asinf) pico_wrap_function(${TARGET} asinf)
pico_wrap_function(${TARGET} acosf) pico_wrap_function(${TARGET} acosf)
pico_wrap_function(${TARGET} atanf) pico_wrap_function(${TARGET} atanf)
@@ -93,7 +105,9 @@
) )
target_link_libraries(pico_float_none INTERFACE pico_float_headers) target_link_libraries(pico_float_none INTERFACE pico_float_headers)
wrap_float_functions(pico_float_none) wrap_float_functions(pico_float_none) # we wrap all functions
# be explicit that there should be no floating point instructions
target_compile_options(pico_float_none INTERFACE -msoft-float)
pico_add_library(pico_float_pico) pico_add_library(pico_float_pico)
if (PICO_RP2040) if (PICO_RP2040)
@@ -107,21 +121,52 @@
target_link_libraries(pico_float_pico INTERFACE pico_bootrom pico_float_headers hardware_divider) target_link_libraries(pico_float_pico INTERFACE pico_bootrom pico_float_headers hardware_divider)
elseif(NOT PICO_RISCV) elseif(NOT PICO_RISCV)
pico_add_library(pico_float_pico_dcp) pico_add_library(pico_float_pico_dcp)
# todo what functions from float_math belong in each case; should some be left to GCC on RP2350?
target_sources(pico_float_pico_dcp INTERFACE target_sources(pico_float_pico_dcp INTERFACE
${CMAKE_CURRENT_LIST_DIR}/float_math.c ${CMAKE_CURRENT_LIST_DIR}/float_math.c
${CMAKE_CURRENT_LIST_DIR}/float_aeabi_dcp.S ${CMAKE_CURRENT_LIST_DIR}/float_aeabi_dcp.S
${CMAKE_CURRENT_LIST_DIR}/float_common_m33.S
${CMAKE_CURRENT_LIST_DIR}/float_sci_m33.S ${CMAKE_CURRENT_LIST_DIR}/float_sci_m33.S
${CMAKE_CURRENT_LIST_DIR}/float_conv_m33.S
) )
wrap_float_functions(pico_float_pico_dcp NO_WRAP_AEABI) # NOTE the main reason for using pico_float_pico_dcp is presumably that you
# don't want to use VFP at all, so turn off compiler support, otherwise, it will inline usages
target_compile_options(pico_float_pico_dcp INTERFACE -msoft-float)
wrap_float_functions(pico_float_pico_dcp
# we wrap all functions as we don't want to use VFP (or compiler versions) at all
#NO_AEABI_ARITHMETIC
#NO_AEABI_CMP
#NO_AEABI_CONV_32
#NO_AEABI_CONV_64
#NO_AEABI_CONV_DOUBLE
#NO_SQRTF
#NO_SCI
#NO_SCI_EXTRA
)
pico_add_library(pico_float_pico_vfp) pico_add_library(pico_float_pico_vfp)
target_sources(pico_float_pico_vfp INTERFACE target_sources(pico_float_pico_vfp INTERFACE
${CMAKE_CURRENT_LIST_DIR}/float_math.c ${CMAKE_CURRENT_LIST_DIR}/float_math.c
${CMAKE_CURRENT_LIST_DIR}/float_conv32_vfp.S
${CMAKE_CURRENT_LIST_DIR}/float_common_m33.S
${CMAKE_CURRENT_LIST_DIR}/float_sci_m33_vfp.S ${CMAKE_CURRENT_LIST_DIR}/float_sci_m33_vfp.S
${CMAKE_CURRENT_LIST_DIR}/float_conv_m33.S
) )
wrap_float_functions(pico_float_pico_vfp NO_WRAP_AEABI) wrap_float_functions(pico_float_pico_vfp
# for these 3, arguably compiler is probably inlining anyway, but use the cmopiler's
# version for explicit AEABI calls
NO_AEABI_ARITHMETIC
NO_AEABI_CMP
NO_AEABI_CONV_32
#NO_AEABI_CONV_64 # we have optimized M33 versions
NO_AEABI_CONV_DOUBLE
# we don't have an optimized vfp or m33 sqrtf available
NO_SQRTF
#NO_SCI # we have optimized VFP versions
#NO_SCI_EXTRA # todo - are our versions better than what GCC proides?
)
target_link_libraries(pico_float_pico INTERFACE target_link_libraries(pico_float_pico INTERFACE
pico_float_pico_vfp) pico_float_pico_vfp)
else() else()

View File

@@ -5,15 +5,17 @@
*/ */
#include "pico/asm_helper.S" #include "pico/asm_helper.S"
#if HAS_DOUBLE_COPROCESSOR
#if !HAS_DOUBLE_COPROCESSOR
#error attempt to compile float_aeabi_dcp when there is no DCP
#else
#include "hardware/dcp_instr.inc.S" #include "hardware/dcp_instr.inc.S"
#include "hardware/dcp_canned.inc.S" #include "hardware/dcp_canned.inc.S"
pico_default_asm_setup pico_default_asm_setup
// todo alignment // todo factor out save/restore (there is a copy in double code)
//__pre_init __aeabi_float_init, 00020
// factor out save/restore (there is a copy in double code)
.macro float_section name .macro float_section name
#if PICO_FLOAT_IN_RAM #if PICO_FLOAT_IN_RAM
@@ -29,7 +31,7 @@ float_section WRAPPER_FUNC_NAME(\func)
// ============== STATE SAVE AND RESTORE =============== // ============== STATE SAVE AND RESTORE ===============
.macro saving_func func .macro saving_func type func, opt_label1='-', opt_label2='-'
// Note we are usually 32-bit aligned already at this point, as most of the // Note we are usually 32-bit aligned already at this point, as most of the
// function bodies contain exactly two 16-bit instructions: bmi and bx lr. // function bodies contain exactly two 16-bit instructions: bmi and bx lr.
// We want the PCMP word-aligned. // We want the PCMP word-aligned.
@@ -41,8 +43,14 @@ float_section WRAPPER_FUNC_NAME(\func)
push {lr} // 16-bit instruction push {lr} // 16-bit instruction
bl generic_save_state // 32-bit instruction bl generic_save_state // 32-bit instruction
b 1f // 16-bit instruction b 1f // 16-bit instruction
.ifnc \opt_label1,'-'
regular_func \opt_label1
.endif
.ifnc \opt_label2,'-'
regular_func \opt_label2
.endif
// This is the actual entry point: // This is the actual entry point:
wrapper_func \func \type\()_func \func
PCMP apsr_nzcv PCMP apsr_nzcv
bmi 1b bmi 1b
1: 1:
@@ -82,115 +90,208 @@ generic_restore_state:
// ============== ARITHMETIC FUNCTIONS =============== // ============== ARITHMETIC FUNCTIONS ===============
float_wrapper_section __aeabi_fadd float_wrapper_section __aeabi_fadd
saving_func __aeabi_fadd saving_func wrapper __aeabi_fadd
dcp_fadd_m r0,r0,r1 dcp_fadd_m r0,r0,r1
saving_func_return saving_func_return
float_wrapper_section __aeabi_fsub float_wrapper_section __aeabi_fsub
saving_func __aeabi_fsub saving_func wrapper __aeabi_fsub
dcp_fsub_m r0,r0,r1 dcp_fsub_m r0,r0,r1
saving_func_return saving_func_return
float_wrapper_section __aeabi_frsub float_wrapper_section __aeabi_frsub
saving_func __aeabi_frsub saving_func wrapper __aeabi_frsub
dcp_fsub_m r0,r1,r0 dcp_fsub_m r0,r1,r0
saving_func_return saving_func_return
float_wrapper_section __aeabi_fmul float_wrapper_section __aeabi_fmul
saving_func __aeabi_fmul saving_func wrapper __aeabi_fmul
dcp_fmul_m r0,r0,r1,r0,r1 dcp_fmul_m r0,r0,r1,r0,r1
saving_func_return saving_func_return
float_section fdiv_fast float_section fdiv_fast
saving_func fdiv_fast saving_func regular fdiv_fast
dcp_fdiv_fast_m r0,r0,r1,r0,r1,r2 dcp_fdiv_fast_m r0,r0,r1,r0,r1,r2
saving_func_return saving_func_return
float_wrapper_section __aeabi_fdiv float_wrapper_section __aeabi_fdiv
saving_func __aeabi_fdiv saving_func wrapper __aeabi_fdiv
@ with correct rounding @ with correct rounding
dcp_fdiv_m r0,r0,r1,r0,r1,r2,r3 dcp_fdiv_m r0,r0,r1,r0,r1,r2,r3
saving_func_return saving_func_return
float_section sqrtf_fast float_section sqrtf_fast
saving_func sqrtf_fast saving_func regular sqrtf_fast
dcp_fsqrt_fast_m r0,r0,r0,r1,r2,r3 dcp_fsqrt_fast_m r0,r0,r0,r1,r2,r3
saving_func_return saving_func_return
float_wrapper_section sqrtf float_wrapper_section sqrtf
saving_func sqrtf saving_func wrapper sqrtf
@ with correct rounding @ with correct rounding
dcp_fsqrt_m r0,r0,r0,r1,r2,r3 dcp_fsqrt_m r0,r0,r0,r1,r2,r3
saving_func_return saving_func_return
// todo not a real thing float_section fclassify
float_wrapper_section __aeabi_fclassify saving_func regular fclassify
saving_func __aeabi_fclassify
dcp_fclassify_m apsr_nzcv,r0 dcp_fclassify_m apsr_nzcv,r0
saving_func_return saving_func_return
// ============== CONVERSION FUNCTIONS =============== // ============== CONVERSION FUNCTIONS ===============
float_wrapper_section __aeabi_f2d float_wrapper_section __aeabi_f2d
saving_func __aeabi_f2d saving_func wrapper __aeabi_f2d float2double
dcp_float2double_m r0,r1,r0 dcp_float2double_m r0,r1,r0
saving_func_return saving_func_return
float_wrapper_section __aeabi_i2f float_wrapper_section __aeabi_i2f
saving_func __aeabi_i2f saving_func wrapper __aeabi_i2f int2float
@ with rounding @ with rounding
dcp_int2float_m r0,r0 dcp_int2float_m r0,r0
saving_func_return saving_func_return
float_wrapper_section __aeabi_ui2f float_wrapper_section __aeabi_ui2f
saving_func __aeabi_ui2f saving_func wrapper __aeabi_ui2f uint2float
@ with rounding @ with rounding
dcp_uint2float_m r0,r0 dcp_uint2float_m r0,r0
saving_func_return saving_func_return
float_section float2fix_z
regular_func float2fix_z
ubfx r2, r0, #23, #8
cbz r2, 2f // input is zero or denormal
cmp r2, #0xff
beq 3f // input infinite or nan
adds r2, r1
ble 2f // modified input is denormal so zero
cmp r2, #0xff
beq 3f // modified input is infinite
1:
bfi r0, r2, #23, #8
b float2int_z_entry
2:
movs r0, #0
bx lr
3:
mvn r1, #0x80000000
add r0, r1, r0, lsr#31 @ so -Inf → 0x80000000, +Inf → 0x7fffffff
bx lr
float_wrapper_section __aeabi_f2iz float_wrapper_section __aeabi_f2iz
saving_func __aeabi_f2iz saving_func wrapper __aeabi_f2iz float2int_z
@ with truncation towards 0 @ with truncation towards 0
float2int_z_entry:
dcp_float2int_m r0,r0 dcp_float2int_m r0,r0
saving_func_return saving_func_return
float_section __aeabi_f2ufix
regular_func float2ufix
regular_func float2ufix_z
ubfx r2, r0, #23, #8
cbz r2, 2f // input is zero or denormal
cmp r2, #0xff
beq 3f // input infinite or nan
adds r2, r1
ble 2f // modified input is denormal so zero
cmp r2, #0xff
beq 3f // modified input is infinite
1:
bfi r0, r2, #23, #8
b float2uint_z_entry
2:
movs r0, #0
bx lr
3:
mvn r0, r0, asr #31
bx lr
float_wrapper_section __aeabi_f2uiz float_wrapper_section __aeabi_f2uiz
saving_func __aeabi_f2uiz saving_func wrapper __aeabi_f2uiz float2uint_z float2uint
@ with truncation towards 0 @ with truncation towards 0
float2uint_z_entry:
dcp_float2uint_m r0,r0 dcp_float2uint_m r0,r0
saving_func_return saving_func_return
// todo not a real thing float_section conv_f2fix
saving_func regular float2fix
ubfx r2, r0, #23, #8
cbz r2, 2f // input is zero or denormal
cmp r2, #0xff
beq 3f // input infinite or nan
adds r2, r1
ble 2f // modified input is denormal so zero
cmp r2, #0xff
beq 3f // modified input is infinite
1:
bfi r0, r2, #23, #8
b float2int_entry
2:
movs r0, #0
bx lr
3:
mvn r1, #0x80000000
add r0, r1, r0, lsr#31 @ so -Inf → 0x80000000, +Inf → 0x7fffffff
bx lr
float_section float2int
// (not a real thing - kept because we use wrapper in saving_func)
saving_func regular float2int
float2int_entry:
lsls r1, r0, #1
// r0 = abs(zero) => r1 = 0x00000000
// r0 = abs(denornaml) => r1 = 0x00xxxxxx
// r0 = abs(1.0f) => r1 = 0x7f000000
// r0 = abs(inf/nan) => r1 = 0xffxxxxxx
bls float2int_z_entry // input positive or zero or -zero are ok for int64_z
lsrs r1, #24
beq float2int_z_entry // input denormal is flushed to zero anyway
subs r1, #0x7f
bcc 1f // input < 1.0f means we need to subtract 1 after conversion
// mask off all but fractional bits
lsls r2, r0, r1
lsls r2, #9
beq float2int_z_entry // input is integer
1:
WXFC r0, r0
ADD0
ADD1
NTDC
RDIC r0
subs r0, #1
saving_func_return
#if 0 // not sure these are super useful; if they are we should give them names
float_wrapper_section __aeabi_f2i_r float_wrapper_section __aeabi_f2i_r
saving_func __aeabi_f2i_r // (not a real thing - kept because we use wrapper in saving_func)
saving_func wrapper __aeabi_f2i_r
@ with rounding @ with rounding
dcp_float2int_r_m r0,r0 dcp_float2int_r_m r0,r0
saving_func_return saving_func_return
// todo not a real thing
float_wrapper_section __aeabi_f2ui_r float_wrapper_section __aeabi_f2ui_r
saving_func __aeabi_f2ui_r // (not a real thing - kept because we use wrapper in saving_func)
saving_func wrapper __aeabi_f2ui_r
@ with rounding @ with rounding
dcp_float2uint_r_m r0,r0 dcp_float2uint_r_m r0,r0
saving_func_return saving_func_return
#endif
// ============== COMPARISON FUNCTIONS =============== // ============== COMPARISON FUNCTIONS ===============
float_wrapper_section __aeabi_fcmpun float_wrapper_section __aeabi_fcmpun
saving_func __aeabi_fcmpun saving_func wrapper __aeabi_fcmpun
dcp_fcmp_m r0,r0,r1 dcp_fcmp_m r0,r0,r1
// extract unordered bit // extract unordered bit
ubfx r0, r0, #28, #1 ubfx r0, r0, #28, #1
saving_func_return saving_func_return
float_wrapper_section __aeabi_fcmp float_wrapper_section __aeabi_fcmp
saving_func __aeabi_cfrcmple saving_func wrapper __aeabi_cfrcmple
dcp_fcmp_m apsr_nzcv,r1,r0 // with arguments reversed dcp_fcmp_m apsr_nzcv,r1,r0 // with arguments reversed
bvs cmp_nan bvs cmp_nan
saving_func_return saving_func_return
// these next two can be the same function in the absence of exceptions // these next two can be the same function in the absence of exceptions
saving_func __aeabi_cfcmple saving_func wrapper __aeabi_cfcmple
dcp_fcmp_m apsr_nzcv,r0,r1 dcp_fcmp_m apsr_nzcv,r0,r1
bvs cmp_nan bvs cmp_nan
saving_func_return saving_func_return
@@ -198,7 +299,7 @@ saving_func __aeabi_cfcmple
// It is not clear from the ABI documentation whether cfcmpeq must set the C flag // It is not clear from the ABI documentation whether cfcmpeq must set the C flag
// in the same way as cfcmple. If not, we could save the "bvs" below; but we // in the same way as cfcmple. If not, we could save the "bvs" below; but we
// err on the side of caution. // err on the side of caution.
saving_func __aeabi_cfcmpeq saving_func wrapper __aeabi_cfcmpeq
dcp_fcmp_m apsr_nzcv,r0,r1 dcp_fcmp_m apsr_nzcv,r0,r1
bvs cmp_nan bvs cmp_nan
saving_func_return saving_func_return
@@ -212,14 +313,14 @@ cmp_nan:
saving_func_return saving_func_return
float_wrapper_section __aeabi_fcmpeq float_wrapper_section __aeabi_fcmpeq
saving_func __aeabi_fcmpeq saving_func wrapper __aeabi_fcmpeq
dcp_fcmp_m r0,r0,r1 dcp_fcmp_m r0,r0,r1
// extract Z // extract Z
ubfx r0, r0, #30, #1 ubfx r0, r0, #30, #1
saving_func_return saving_func_return
float_wrapper_section __aeabi_fcmplt float_wrapper_section __aeabi_fcmplt
saving_func __aeabi_fcmplt saving_func wrapper __aeabi_fcmplt
dcp_fcmp_m apsr_nzcv,r1,r0 dcp_fcmp_m apsr_nzcv,r1,r0
ite hi ite hi
movhi r0,#1 movhi r0,#1
@@ -227,7 +328,7 @@ saving_func __aeabi_fcmplt
saving_func_return saving_func_return
float_wrapper_section __aeabi_fcmple float_wrapper_section __aeabi_fcmple
saving_func __aeabi_fcmple saving_func wrapper __aeabi_fcmple
dcp_fcmp_m apsr_nzcv,r1,r0 dcp_fcmp_m apsr_nzcv,r1,r0
ite hs ite hs
movhs r0,#1 movhs r0,#1
@@ -235,7 +336,7 @@ saving_func __aeabi_fcmple
saving_func_return saving_func_return
float_wrapper_section __aeabi_fcmpge float_wrapper_section __aeabi_fcmpge
saving_func __aeabi_fcmpge saving_func wrapper __aeabi_fcmpge
dcp_fcmp_m apsr_nzcv,r0,r1 dcp_fcmp_m apsr_nzcv,r0,r1
ite hs ite hs
movhs r0,#1 movhs r0,#1
@@ -243,7 +344,7 @@ saving_func __aeabi_fcmpge
saving_func_return saving_func_return
float_wrapper_section __aeabi_fcmpgt float_wrapper_section __aeabi_fcmpgt
saving_func __aeabi_fcmpgt saving_func wrapper __aeabi_fcmpgt
dcp_fcmp_m apsr_nzcv,r0,r1 dcp_fcmp_m apsr_nzcv,r0,r1
ite hi ite hi
movhi r0,#1 movhi r0,#1

View File

@@ -471,17 +471,36 @@ float_section float2int
regular_func float2int regular_func float2int
shimmable_table_tail_call SF_TABLE_FLOAT2INT float2int_shim shimmable_table_tail_call SF_TABLE_FLOAT2INT float2int_shim
float_section float2fix_z
regular_func float2fix_z
cmn r0, r0
bcc float2fix
push {lr}
lsls r0, #1
lsrs r0, #1
bl float2ufix_z
cmp r0, #0
bmi 1f
negs r0, r0
pop {pc}
1:
movs r0, #128
lsls r0, #24
pop {pc}
float_section float2fix float_section float2fix
regular_func float2fix regular_func float2fix
shimmable_table_tail_call SF_TABLE_FLOAT2FIX float2fix_shim shimmable_table_tail_call SF_TABLE_FLOAT2FIX float2fix_shim
float_section float2ufix float_section float2ufix
regular_func float2ufix regular_func float2ufix
regular_func float2ufix_z
table_tail_call SF_TABLE_FLOAT2UFIX table_tail_call SF_TABLE_FLOAT2UFIX
// unsigned FUNC_NAME(__aeabi_f2uiz)(float) float (single precision) to unsigned C-style conversion [3] // unsigned FUNC_NAME(__aeabi_f2uiz)(float) float (single precision) to unsigned C-style conversion [3]
float_wrapper_section __aeabi_f2uiz float_wrapper_section __aeabi_f2uiz
wrapper_func __aeabi_f2uiz wrapper_func __aeabi_f2uiz
regular_func float2uint
regular_func float2uint_z regular_func float2uint_z
table_tail_call SF_TABLE_FLOAT2UINT table_tail_call SF_TABLE_FLOAT2UINT
@@ -530,10 +549,11 @@ wrapper_func __aeabi_f2lz
regular_func float2int64_z regular_func float2int64_z
cmn r0, r0 cmn r0, r0
bcc float2int64 bcc float2int64
movs r1, #0
float2fix64_z_neg:
push {lr} push {lr}
lsls r0, #1 lsls r0, #1
lsrs r0, #1 lsrs r0, #1
movs r1, #0
bl float2ufix64 bl float2ufix64
cmp r1, #0 cmp r1, #0
bmi 1f bmi 1f
@@ -553,17 +573,24 @@ regular_func float2int64
shimmable_table_tail_call SF_TABLE_FLOAT2INT64 float2int64_shim shimmable_table_tail_call SF_TABLE_FLOAT2INT64 float2int64_shim
float_section float2fix64 float_section float2fix64
regular_func float2fix64_z
cmn r0, r0
bcs float2fix64_z_neg
// fall thru
regular_func float2fix64 regular_func float2fix64
shimmable_table_tail_call SF_TABLE_FLOAT2FIX64 float2fix64_shim shimmable_table_tail_call SF_TABLE_FLOAT2FIX64 float2fix64_shim
// unsigned long long FUNC_NAME(__aeabi_f2ulz)(float) float to unsigned long long C-style conversion [3] // unsigned long long FUNC_NAME(__aeabi_f2ulz)(float) float to unsigned long long C-style conversion [3]
float_wrapper_section __aeabi_f2ulz float_wrapper_section __aeabi_f2ulz
wrapper_func __aeabi_f2ulz wrapper_func __aeabi_f2ulz
regular_func float2uint64
regular_func float2uint64_z regular_func float2uint64_z
shimmable_table_tail_call SF_TABLE_FLOAT2UINT64 float2uint64_shim shimmable_table_tail_call SF_TABLE_FLOAT2UINT64 float2uint64_shim
float_section float2ufix64 float_section float2ufix64
regular_func float2ufix64 regular_func float2ufix64
regular_func float2ufix64_z
shimmable_table_tail_call SF_TABLE_FLOAT2UFIX64 float2ufix64_shim shimmable_table_tail_call SF_TABLE_FLOAT2UFIX64 float2ufix64_shim
float_wrapper_section __aeabi_f2d float_wrapper_section __aeabi_f2d

View File

@@ -241,7 +241,52 @@ regular_func ufix642float
bxlo r14 bxlo r14
b 3b b 3b
float_wrapper_section conv_ftoi64 float_section conv_ftoi64
regular_func float2int64
lsls r1, r0, #1
// r0 = abs(zero) => r1 = 0x00000000
// r0 = abs(denornaml) => r1 = 0x00xxxxxx
// r0 = abs(1.0f) => r1 = 0x7f000000
// r0 = abs(inf/nan) => r1 = 0xffxxxxxx
bls float2int64_z // positive or zero or -zero are ok for int64_z
lsrs r1, #24
subs r1, #0x7f
bcc 1f // <1 means subtract 1
// mask off all but fractional bits
lsls r2, r0, r1
lsls r2, #9
beq float2int64_z // integer
1:
push {lr}
bl float2int64_z
subs r0, #1
sbcs r1, r1, #0
pop {pc}
float_section conv_ftof64
regular_func float2fix64
lsls r2, r0, #1
// r0 = abs(zero) => r1 = 0x00000000
// r0 = abs(denornaml) => r1 = 0x00xxxxxx
// r0 = abs(1.0f) => r1 = 0x7f000000
// r0 = abs(inf/nan) => r1 = 0xffxxxxxx
bls float2fix64_z // positive or zero or -zero are ok for fix64_z
lsrs r2, #24
rsbs r3, r1, #0x7f
subs r2, r3
bcc 1f // <1 means subtract 1
// mask off all but fractional bits
lsls r2, r0, r2
lsls r2, #9
beq float2fix64_z // integer
1:
push {lr}
bl float2fix64_z
subs r0, #1
sbcs r1, r1, #0
pop {pc}
float_wrapper_section conv_ftoi64z
@ convert float to signed int64, rounding towards 0, clamping @ convert float to signed int64, rounding towards 0, clamping
wrapper_func __aeabi_f2lz wrapper_func __aeabi_f2lz
@@ -318,7 +363,7 @@ regular_func float2uint64_z
movs r1,#0 @ fall through movs r1,#0 @ fall through
@ convert float in r0 to unsigned fixed point in r0:r1, clamping @ convert float in r0 to unsigned fixed point in r0:r1, clamping
regular_func float2ufix64 regular_func float2ufix64
//regular_func float2ufix64_z regular_func float2ufix64_z
subs r1,#0x96 @ remove exponent bias, compensate for mantissa length subs r1,#0x96 @ remove exponent bias, compensate for mantissa length
asrs r2,r0,#23 @ sign and exponent asrs r2,r0,#23 @ sign and exponent
sub r3,r2,#1 sub r3,r2,#1

View File

@@ -0,0 +1,106 @@
/*
* Copyright (c) 2024 Raspberry Pi (Trading) Ltd.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
#if !PICO_RP2040
#include "pico/asm_helper.S"
pico_default_asm_setup
.macro float_section name
#if PICO_FLOAT_IN_RAM
.section RAM_SECTION_NAME(\name), "ax"
#else
.section SECTION_NAME(\name), "ax"
#endif
.endm
float_section int2float
regular_func int2float
vmov s15, r0
vcvt.f32.s32 s15, s15
vmov r0, s15
bx lr
float_section uint2float
regular_func uint2float
vmov s15, r0
vcvt.f32.u32 s15, s15
vmov r0, s15
bx lr
float_section float2int
regular_func float2int
vmov s15, r0
vcvtm.s32.f32 s15, s15
vmov r0, s15
bx lr
float_section float2int_z
regular_func float2int_z
vmov s15, r0
vcvt.s32.f32 s15, s15
vmov r0, s15
bx lr
float_section float2uint
regular_func float2uint
regular_func float2uint_z
vmov s15, r0
vcvt.u32.f32 s15, s15
vmov r0, s15
bx lr
float_section float2fix_z
regular_func float2fix_z
ubfx r2, r0, #23, #8
adds r2, r1
asrs r3, r2, #8
beq 1f
ite pl
movpl r2, #0xff
movmi r2, #0
1:
bfi r0, r2, #23, #8
b float2int_z
float_section float2fix
regular_func float2fix
lsls r2, r0, #1
// r0 = abs(zero) => r1 = 0x00000000
// r0 = abs(denornaml) => r1 = 0x00xxxxxx
// r0 = abs(1.0f) => r1 = 0x7f000000
// r0 = abs(inf/nan) => r1 = 0xffxxxxxx
bls float2fix_z // input positive or zero or -zero are ok for fix_z
lsrs r2, #24
beq float2fix_z // input denormal will be flushed to zero
rsbs r3, r1, #0x7f
subs r2, r3
bcc 1f // iunput <1.0f means we need to subtract 1
// mask off all but fractional bits
lsls r2, r0, r2
lsls r2, #9
beq float2fix_z // input is integer
1:
push {lr}
bl float2fix_z
subs r0, #1
sbcs r1, r1, #0
pop {pc}
float_section float2ufix
regular_func float2ufix
regular_func float2ufix_z
ubfx r2, r0, #23, #8
adds r2, r1
asrs r3, r2, #8
beq 1f
ite pl
movpl r2, #0xff
movmi r2, #0
1:
bfi r0, r2, #23, #8
b float2uint_z
#endif

View File

@@ -21,68 +21,296 @@ extern "C" {
* *
* \brief Optimized single-precision floating point functions * \brief Optimized single-precision floating point functions
* *
* (Replacement) optimized implementations are provided for the following compiler built-ins * An application can take control of the floating point routines used in the application over and above what is provided by the compiler,
* and math library functions on Arm: * by depending on the pico_float library. A user might want to do this
* *
* - __aeabi_fadd, __aeabi_fdiv, __aeabi_fmul, __aeabi_frsub, __aeabi_fsub, __aeabi_cfcmpeq, __aeabi_cfrcmple, __aeabi_cfcmple, __aeabi_fcmpeq, __aeabi_fcmplt, __aeabi_fcmple, __aeabi_fcmpge, __aeabi_fcmpgt, __aeabi_fcmpun, __aeabi_i2f, __aeabi_l2f, __aeabi_ui2f, __aeabi_ul2f, __aeabi_f2iz, __aeabi_f2lz, __aeabi_f2uiz, __aeabi_f2ulz, __aeabi_f2d, sqrtf, cosf, sinf, tanf, atan2f, expf, logf * 1. To use optimized software implementations provided by the RP2-series device's bootrom or the SDK
* - ldexpf, copysignf, truncf, floorf, ceilf, roundf, asinf, acosf, atanf, sinhf, coshf, tanhf, asinhf, acoshf, atanhf, exp2f, log2f, exp10f, log10f, powf, hypotf, cbrtf, fmodf, dremf, remainderf, remquof, expm1f, log1pf, fmaf * 2. To use optimized combined software/hardware implementations utilizing custom RP2-series hardware for acceleration
* - powintf, sincosf (GNU extensions) * 3. To control the amount of C compiler/library code bloat
* 4. To make sure no floating point is called at all
* *
* The following additional optimized functions are also provided: * The pico_float library comes in three main flavors:
* *
* - int2float, uint2float, int642float, uint642float, fix2float, ufix2float, fix642float, ufix642float * 1. `pico_float_none` - all floating point operations cause a \ref panic - no single-precision floating point code is included
* - float2fix, float2ufix, float2fix64, float2ufix64, float2int, float2uint, float2int64, float2uint64, float2int_z, float2int64_z, float2uint_z, float2uint64_z * 2. `pico_float_compiler` - no custom functions are provided; all single-precision floating point is handled by the C compiler/library
* - exp10f, sincosf, powintf * 3. `pico_float_pico` - the smallest and fastest available for the platform, along with additional functionality (e.g. fixed point conversions) which are detailed below
* *
* On RP2350 (Arm) the following additional functions are available; the _fast methods are faster but do not round correctly * The user can control which version they want (e.g. **pico_float_xxx** by either setting the CMake global variable
* `PICO_DEFAULT_FLOAT_IMPL=xxx`, or by using the CMake function `pico_set_float_implementation(<TARGET> xxx)`. Note that in the absence
* of either, pico_float_pico is used by default.
* *
* - float2fix64_z, fdiv_fast, fsqrt_fast, * \if rp2040_specific
* On RP2040, `pico_float_pico` uses optimized hand coded implementations from the bootrom and the SDK for both
* basic single-precision floating point operations and floating point math library functions. These implementations
* are generally faster and smaller than those provided by the C compiler/library, though they don't support all the features of a fully compliant
* floating point implementation; they are however usually fine for the majority of cases
* \endif
* *
* On RP2350 RISC-V, only a small number of compiler runtime functions are overridden with faster implementations: * \if rp2350_specific
* On Arm on RP2350, there are multiple options for `pico_float_pico`:
* *
* - __addsf3, __subsf3, __mulsf3 * 1. `pico_float_pico_vfp` - this library leaves basic C single-precision floating point operations to the compiler
* which can use inlined VFP (Arm FPU) code. Custom optimized versions of trigonometric and scientific functions are provided.
* No DCP (RP2350 Double co-processor) instructions are used.
* 2. `pico_float_pico_dcp` - this library prevents the compiler injecting inlined VFP code, and also implements
* all single-precision floating point operations in optimized DCP or M33 code. This option is not quite as fast
* as pico_float_pico_vfp, however it allows floating point operations without enabling the floating point co-processor
* on the CPU; this can be beneficial in certain circumstances, e.g. where leaving stack in tasks or interrupts
* for the floating point state is undesirable.
*
* Note: `pico_float_pico` is equivalent to `pico_float_pico_vfp` on RP2350, as this is the most sensible default
* \endif
*
* On Arm, (replacement) optimized implementations are provided for the following compiler built-ins
* and math library functions when using `_pico` variants of `pico_float`:
*
* - basic arithmetic: (except `pico_float_pico_vfp`)
*
* __aeabi_fadd, __aeabi_fdiv, __aeabi_fmul, __aeabi_frsub, __aeabi_fsub
*
* - comparison: (except `pico_float_pico_vfp`)
*
* __aeabi_cfcmpeq, __aeabi_cfrcmple, __aeabi_cfcmple, __aeabi_fcmpeq, __aeabi_fcmplt, __aeabi_fcmple, __aeabi_fcmpge, __aeabi_fcmpgt, __aeabi_fcmpun
*
* - (u)int32 <-> float: (except `pico_float_pico_vfp`)
*
* __aeabi_i2f, __aeabi_ui2f, __aeabi_f2iz, __aeabi_f2uiz
*
* - (u)int64 <-> float: (except `pico_float_pico_vfp`)
*
* __aeabi_l2f, __aeabi_ul2f, __aeabi_f2lz, __aeabi_f2ulz
*
* - float -> double: (except `pico_float_pico_vfp`)
*
* __aeabi_f2d
*
* - basic trigonometric:
*
* sqrtf, cosf, sinf, tanf, atan2f, expf, logf
*
* - trigonometric and scientific
*
* ldexpf, copysignf, truncf, floorf, ceilf, roundf, asinf, acosf, atanf, sinhf, coshf, tanhf, asinhf, acoshf, atanhf, exp2f, log2f, exp10f, log10f, powf, hypotf, cbrtf, fmodf, dremf, remainderf, remquof, expm1f, log1pf, fmaf
*
* - GNU exetnsions:
*
* powintf, sincosf
*
* On Arm, the following additional optimized functions are also provided (when using `_pico` variants of `pico_float`):
*
* - Conversions to/from integer types:
*
* - (u)int -> float (round to nearest):
*
* int2float, uint2float, int642float, uint642float
*
* note: on `pico_float_pico_vfp` the 32-bit functions are also provided as C macros since they map to inline VFP code
*
* - (u)float -> int (round towards zero):
*
* float2int_z, float2uint_z, float2int64_z, float2uint64_z
*
* note: on `pico_float_pico_vfp` the 32-bit functions are also provided as C macros since they map to inline VFP code
*
* - (u)float -> int (round towards -infinity):
*
* float2int, float2uint, float2int64, float2uint64
*
* - Conversions to/from fixed point integers:
*
* - (u)fix -> float (round to nearest):
*
* fix2float, ufix2float, fix642float, ufix642float
*
* - float -> (u)fix (round towards zero):
*
* float2fix_z, float2ufix_z, float2fix64_z, float2ufix64_z
*
* note: on `pico_float_pico_vfp` the 32-bit functions are also provided as C macros since they can map to inline VFP code
* when the number of fractional bits is a compile time constant between 1 and 32
*
* - float -> (u)fix (round towards -infinity):
*
* float2fix, float2ufix, float2fix64, float2ufix64
*
* note: on `pico_float_pico_vfp` the 32-bit functions are also provided as C macros since they can map to inline VFP code
* when the number of fractional bits is a compile time constant between 1 and 32
*
* - Even faster versions of divide and square-root functions that do not round correctly: (`pico_float_pico_dcp` only)
*
* fdiv_fast, sqrtf_fast
*
* \if rp2350_specific
* On RISC-V, (replacement) optimized implementations are provided for the following compiler built-ins when using the `pico_float_pico`
* library (note that there are no variants of this library like there are on Arm):
*
* - basic arithmetic:
*
* __addsf3, __subsf3, __mulsf3
* \endif
*/ */
// None of these functions are available on RISC-V:
#if !defined(__riscv) || PICO_COMBINED_DOCS #if !defined(__riscv) || PICO_COMBINED_DOCS
float int2float(int32_t f); #if PICO_COMBINED_DOCS || !LIB_PICO_FLOAT_COMPILER
float uint2float(uint32_t f); float int2float(int32_t i);
float int642float(int64_t f); float uint2float(uint32_t i);
float uint642float(uint64_t f); float int642float(int64_t i);
float uint642float(uint64_t i);
float fix2float(int32_t m, int e); float fix2float(int32_t m, int e);
float ufix2float(uint32_t m, int e); float ufix2float(uint32_t m, int e);
float fix642float(int64_t m, int e); float fix642float(int64_t m, int e);
float ufix642float(uint64_t m, int e); float ufix642float(uint64_t m, int e);
// These methods round towards -Infinity. // These methods round towards 0, which IS the C way
int32_t float2fix(float f, int e);
uint32_t float2ufix(float f, int e);
int64_t float2fix64(float f, int e);
uint64_t float2ufix64(float f, int e);
int32_t float2int(float f);
uint32_t float2uint(float f);
int64_t float2int64(float f);
uint64_t float2uint64(float f);
// These methods round towards 0.
int32_t float2int_z(float f); int32_t float2int_z(float f);
int64_t float2int64_z(float f); int64_t float2int64_z(float f);
int32_t float2uint_z(float f); int32_t float2uint_z(float f);
int64_t float2uint64_z(float f); int64_t float2uint64_z(float f);
int32_t float2fix_z(float f, int e);
uint32_t float2ufix_z(float f, int e);
int64_t float2fix64_z(float f, int e);
uint64_t float2ufix64_z(float f, int e);
// These methods round towards -Infinity - which IS NOT the C way for negative numbers;
// as such the naming is not ideal, however is kept for backwards compatibility
int32_t float2int(float f);
uint32_t float2uint(float f);
int64_t float2int64(float f);
uint64_t float2uint64(float f);
int32_t float2fix(float f, int e);
uint32_t float2ufix(float f, int e);
int64_t float2fix64(float f, int e);
uint64_t float2ufix64(float f, int e);
#if LIB_PICO_FLOAT_PICO_VFP
// a bit of a hack to inline VFP fixed point conversion when exponent is constant and in range 1-32
#define fix2float(m, e) __builtin_choose_expr(__builtin_constant_p(e), (e) >= 1 && (e) <= 32 ? _fix2float_inline(m, e) : fix2 ## float(m, e), fix2 ## float(m, e))
#define ufix2float(m, e) __builtin_choose_expr(__builtin_constant_p(e), (e) >= 1 && (e) <= 32 ? _ufix2float_inline(m, e) : ufix2 ## float(m, e), ufix2 ## float(m, e))
#define float2fix_z(f, e) __builtin_choose_expr(__builtin_constant_p(e), (e) >= 1 && (e) <= 32 ? _float2fix_z_inline(f, e) : float2 ## fix_z(f, e), float2 ## fix_z(f, e))
#define float2ufix_z(f, e) __builtin_choose_expr(__builtin_constant_p(e), (e) >= 1 && (e) <= 32 ? _float2ufix_z_inline(f, e) : float2 ## ufix_z(f, e), float2 ## ufix_z(f, e))
#define float2fix(f, e) __builtin_choose_expr(__builtin_constant_p(e), (e) >= 1 && (e) <= 32 ? _float2fix_inline(f, e) : float2 ## fix(f, e), float2 ## fix(f, e))
#define float2ufix(f, e) __builtin_choose_expr(__builtin_constant_p(e), (e) >= 1 && (e) <= 32 ? _float2ufix_inline(f, e) : float2 ## ufix(f, e), float2 ## ufix(f, e))
#define _fix2float_inline(m, e) ({ \
int32_t _m = m; \
float f; \
pico_default_asm( \
"vmov %0, %1\n" \
"vcvt.f32.s32 %0, %0, %2\n" \
: "=t" (f) \
: "r" (_m), "i" (e) \
); \
f; \
})
#define _ufix2float_inline(m, e) ({ \
uint32_t _m = m; \
float f; \
pico_default_asm( \
"vmov %0, %1\n" \
"vcvt.f32.u32 %0, %0, %2\n" \
: "=t" (f) \
: "r" (_m), "i" (e) \
); \
f; \
})
#define _float2fix_z_inline(f, e) ({ \
int32_t _m; \
float _f = (f); \
pico_default_asm( \
"vcvt.s32.f32 %0, %0, %2\n" \
"vmov %1, %0\n" \
: "+t" (_f), "=r" (_m) \
: "i" (e) \
); \
_m; \
})
#define _float2ufix_z_inline(f, e) ({ \
uint32_t _m; \
float _f = (f); \
pico_default_asm( \
"vcvt.u32.f32 %0, %0, %2\n" \
"vmov %1, %0\n" \
: "+t" (_f), "=r" (_m) \
: "i" (e) \
); \
_m; \
})
#define _float2fix_z_inline(f, e) ({ \
int32_t _m; \
float _f = (f); \
pico_default_asm( \
"vcvt.s32.f32 %0, %0, %2\n" \
"vmov %1, %0\n" \
: "+t" (_f), "=r" (_m) \
: "i" (e) \
); \
_m; \
})
#define _float2fix_inline(f, e) ({ \
union { float _f; int32_t _i; } _u; \
_u._f = (f); \
uint rc, tmp; \
pico_default_asm( \
"vcvt.s32.f32 %0, %0, %4\n" \
"vmov %2, %0\n" \
"lsls %1, #1\n" \
"bls 2f\n" /* positive or zero or -zero are ok with the result we have */ \
"lsrs %3, %1, #24\n" \
"subs %3, #0x7f - %c4\n" \
"bcc 1f\n" /* 0 < abs(f) < 1 ^ e, so need to round down */ \
/* mask off all but fractional bits */ \
"lsls %1, %3\n" \
"lsls %1, #8\n" \
"beq 2f\n" /* integers can round towards zero */ \
"1:\n" \
/* need to subtract 1 from the result to round towards -infinity... */ \
/* this will never cause an overflow, because to get here we must have had a non integer/infinite value which */ \
/* therefore cannot have been equal to INT64_MIN when rounded towards zero */ \
"subs %2, #1\n" \
"2:\n" \
: "+t" (_u._f), "+r" (_u._i), "=r" (rc), "=r" (tmp) \
: "i" (e) \
); \
rc; \
})
#define _float2ufix_inline(f, e) _float2ufix_z_inline((f), (e))
#endif
#if LIB_PICO_FLOAT_PICO_VFP
// may as well provide inline macros for VFP
#define int2float(i) ((float)(int32_t)(i))
#define uint2float(i) ((float)(uint32_t)(i))
#define float2int_z(f) ((int32_t)(f))
#define float2uint_z(f) ((uint32_t)(f))
#endif
#endif
float exp10f(float x); float exp10f(float x);
void sincosf(float x, float *sinx, float *cosx); void sincosf(float x, float *sinx, float *cosx);
float powintf(float x, int y); float powintf(float x, int y);
#if !PICO_RP2040 || PICO_COMBINED_DOCS #if !PICO_RP2040 || PICO_COMBINED_DOCS
int64_t float2fix64_z(float f, int e);
float fdiv_fast(float n, float d); float fdiv_fast(float n, float d);
float fsqrt_fast(float f); float sqrtf_fast(float f);
#endif #endif
#endif #endif
#if defined(__riscv) || LIB_PICO_FLOAT_COMPILER
// when using the compiler or RISC-V, we provide as many functions as we trivially can - these will be efficient
// when using hard-float on Arm
static inline float int2float(int32_t i) { return (float)i; }
static inline float uint2float(uint32_t i) { return (float)i; }
static inline float int642float(int64_t i) { return (float)i; }
static inline float uint642float(uint64_t i) { return (float)i; }
static inline int32_t float2int_z(float f) { return (int32_t)f; }
static inline int64_t float2int64_z(float f) { return (int64_t)f; }
static inline int32_t float2uint_z(float f) { return (uint32_t)f; }
static inline int64_t float2uint64_z(float f) { return (uint64_t)f; }
#endif
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View File

@@ -85,3 +85,12 @@ filegroup(
name = "m33", name = "m33",
srcs = ["m33.c"], srcs = ["m33.c"],
) )
# TODO: Add these tests to the Bazel build.
filegroup(
name = "unsupported_tests",
srcs = [
"custom_double_funcs_test.c",
"custom_float_funcs_test.c",
],
)

View File

@@ -79,4 +79,31 @@ else ()
target_link_libraries(m33 pico_double pico_stdlib) target_link_libraries(m33 pico_double pico_stdlib)
pico_add_extra_outputs(m33) pico_add_extra_outputs(m33)
endif() endif()
endif() endif()
set(FLOAT_TYPES compiler)
set(DOUBLE_TYPES compiler)
list(APPEND FLOAT_TYPES pico)
list(APPEND DOUBLE_TYPES pico)
if (PICO_RP2350)
if (NOT PICO_RISCV)
list(APPEND FLOAT_TYPES pico_vfp pico_dcp)
endif()
endif()
foreach (FLOAT_TYPE IN LISTS FLOAT_TYPES)
add_executable(custom_float_funcs_test_${FLOAT_TYPE} custom_float_funcs_test.c)
pico_set_float_implementation(custom_float_funcs_test_${FLOAT_TYPE} ${FLOAT_TYPE})
target_link_libraries(custom_float_funcs_test_${FLOAT_TYPE} PRIVATE pico_stdlib)
pico_add_extra_outputs(custom_float_funcs_test_${FLOAT_TYPE})
pico_set_printf_implementation(custom_float_funcs_test_${FLOAT_TYPE} compiler)
endforeach ()
foreach (DOUBLE_TYPE IN LISTS DOUBLE_TYPES)
add_executable(custom_double_funcs_test_${DOUBLE_TYPE} custom_double_funcs_test.c)
pico_set_double_implementation(custom_double_funcs_test_${DOUBLE_TYPE} ${DOUBLE_TYPE})
target_link_libraries(custom_double_funcs_test_${DOUBLE_TYPE} PRIVATE pico_stdlib)
pico_add_extra_outputs(custom_double_funcs_test_${DOUBLE_TYPE})
pico_set_printf_implementation(custom_double_funcs_test_${DOUBLE_TYPE} compiler)
endforeach ()

View File

@@ -0,0 +1,515 @@
#include <stdio.h>
#include "pico/stdlib.h"
#include "pico/double.h"
#include "math.h"
#if 0
#define printf(...) ((void)0)
#endif
#if 0
#define stop() return -1
#else
#define stop() rc=1
#endif
#define test_assert(x) ({ if (!(x)) { printf("Assertion failed: ");puts(#x);printf(" at " __FILE__ ":%d\n", __LINE__); stop(); } })
#define test_checkd(x, expected, msg) ({ if ((x) != (expected)) { printf(" %s: %f != %f\n", msg, x, expected); stop(); } })
#define test_checki(x, expected, msg) ({ if ((x) != (expected)) { printf(" %s: %d != %d\n", msg, x, expected); stop(); } })
#define test_checku(x, expected, msg) ({ if ((uint32_t)(x) != (uint32_t)(expected)) { printf(" %s: %u != %u\n", msg, x, expected); stop(); } })
#define test_checki64(x, expected, msg) ({ if ((x) != (expected)) { printf(" %s: %lld != %lld\n", msg, (int64_t)(x), (int64_t)(expected)); stop(); } })
#define test_checku64(x, expected, msg) ({ if ((uint64_t)(x) != (uint64_t)(expected)) { printf(" %s: %llu != %llu\n", msg, (uint64_t)(x), (uint64_t)(expected)); stop(); } })
#if !(LIB_PICO_DOUBLE_COMPILER || defined(__riscv))
static inline double fix2double_8(int32_t m) { return fix2double(m, 8); }
static inline double fix2double_12(int32_t m) { return fix2double(m, 12); }
static inline double fix2double_16(int32_t m) { return fix2double(m, 16); }
static inline double fix2double_24(int32_t m) { return fix2double(m, 24); }
static inline double fix2double_28(int32_t m) { return fix2double(m, 28); }
static inline double fix2double_32(int32_t m) { return fix2double(m, 32); }
static inline double ufix2double_12(int32_t m) { return ufix2double(m, 12); }
static inline double double2fix_12(int32_t m) { return double2fix(m, 12); }
static inline double double2ufix_12(int32_t m) { return double2ufix(m, 12); }
#endif
#if 1 && (LIB_PICO_DOUBLE_COMPILER || defined(__riscv))
#define double2int_z(f) ({ double _d = f; pico_default_asm_volatile("" : "+r" (_d)); double2 ## int_z(_d); })
#define double2uint_z(f) ({ double _d = f; pico_default_asm_volatile("" : "+r" (_d)); double2 ## uint_z(_d); })
#define double2int64_z(f) ({ double _d = f; pico_default_asm_volatile("" : "+r" (_d)); double2 ## int64_z(_d); })
#define double2uint64_z(f) ({ double _d = f; pico_default_asm_volatile("" : "+r" (_d)); double2 ## uint64_z(_d); })
#define int2double(i) ({ int32_t _i = i; pico_default_asm_volatile("" : "+r" (_i)); int2 ## double(_i); })
#define uint2double(i) ({ uint32_t _i = i; pico_default_asm_volatile("" : "+r" (_i)); uint2 ## double(_i); })
#define int642double(i) ({ int64_t _i = i; pico_default_asm_volatile("" : "+r" (_i)); int642 ## double(_i); })
#define uint642double(i) ({ uint64_t _i = i; pico_default_asm_volatile("" : "+r" (_i)); uint642 ## double(_i); })
#endif
int test() {
int rc = 0;
#if LIB_PICO_DOUBLE_PICO
printf(">>> Using PICO\n");
#endif
printf("int2double\n");
test_checkd(int2double(0), 0.0, "int2double1");
test_checkd(int2double(-1), -1.0, "int2double2");
test_checkd(int2double(1), 1.0, "int2double3");
test_checkd(int2double(INT32_MAX), 2147483647.0, "int2double4");
test_checkd(int2double(INT32_MIN), -2147483648.0, "int2double5");
// these have rounding behavior on float but not double
test_checkd(int2double(2147483391), 2147483391.0, "int2double6");
test_checkd(int2double(2147483391), 2147483391.0, "int2double7");
test_checkd(int2double(2147483457), 2147483457.0, "int2double8");
test_checkd(int2double(2147483483), 2147483483.0, "int2double9");
test_checkd(int2double(2147483584), 2147483584.0, "int2double10");
printf("uint2double\n");
test_checkd(uint2double(0), 0.0, "uint2double1");
test_checkd(uint2double(1), 1.0, "uint2double2");
test_checkd(uint2double(INT32_MAX), 2147483647.0, "uint2double3");
// todo test correct rounding around maximum precision
test_checkd(uint2double(UINT32_MAX), 4294967295.0, "uint2double4");
printf("int642double\n");
test_checkd(int642double(0), 0.0, "int642double1");
test_checkd(int642double(-1), -1.0, "int642double2");
test_checkd(int642double(1), 1.0, "int642double3");
test_checkd(int642double(INT32_MAX-1), 2147483646.0, "int642double4");
test_checkd(int642double(INT32_MAX), 2147483647.0, "int642double5");
test_checkd(int642double(INT32_MAX+1ll), 2147483648.0, "int642double6");
test_checkd(int642double(INT32_MIN-1ll), -2147483649.0, "int642double7");
test_checkd(int642double(INT32_MIN), -2147483648.0, "int642double8");
test_checkd(int642double(INT32_MIN+1ll), -2147483647.0, "int642double9");
// todo test correct rounding around maximum precision
test_checkd(int642double(INT64_MAX), 9223372036854775807.0, "int642double10");
test_checkd(int642double(INT64_MIN), -9223372036854775808.0, "int642doubl11e");
printf("uint642double\n");
test_checkd(uint642double(0), 0.0, "uint642double1");
test_checkd(uint642double(1), 1.0, "uint642double2");
test_checkd(uint642double(INT32_MAX-1), 2147483646.0, "uint642double3");
test_checkd(uint642double(INT32_MAX), 2147483647.0, "uint642double4");
test_checkd(uint642double(INT32_MAX+1ll), 2147483648.0, "uint642double5");
test_checkd(uint642double(INT64_MAX), 9223372036854775807.0, "uint642double6");
// todo test correct rounding around maximum precision
test_checkd(uint642double(UINT64_MAX), 18446744073709551615.0, "uint642double7");
union {
uint64_t u;
double d;
} u64d;
#if !(LIB_PICO_DOUBLE_COMPILER || defined(__riscv))
printf("fix2double\n");
// todo test correct rounding around maximum precision
test_checkd(fix2double(-3, 1), -1.5, "fix2double1");
test_checkd(fix2double(-3, 1), -1.5, "fix2double2");
test_checkd(fix2double(-3, -4), -48.0, "fix2double3");
printf("ufix2double\n");
// todo test correct rounding around maximum precision
test_checkd(ufix2double(0xa0000000, 30), 2.5, "ufix2double1");
test_checkd(ufix2double(3, -4), 48.0, "ufix2double2");
printf("fix64double\n");
// todo test correct rounding around maximum precision
test_checkd(fix642double(-0xa000000000ll, 38), -2.5, "fix642double1");
test_checkd(fix642double(-3, -34), -51539607552.0, "fix642double2");
printf("ufix642double\n");
// todo test correct rounding around maximum precision
test_checkd(ufix642double(0xa000000000ll, 38), 2.5, "ufix642double1");
test_checkd(ufix642double(3, -34), 51539607552.0, "fix64double2");
test_checkd(fix2double_8(128), 0.5, "fix2double_8_1");
test_checkd(fix2double_8(-128), -0.5, "fix2double_8_2");
test_checkd(fix2double_16(8192), 0.125, "fix2double_8_3");
test_checkd(fix2double_16(-8192), -0.125, "fix2double_8_4");
test_checkd(fix2double_24(3<<23), 1.5, "fix2double_8_5");
test_checkd(fix2double_24(-(3<<23)), -1.5, "fix2double_8_6");
printf("double2fix\n");
test_checki(double2fix(-0.5, 8), -0x80, "double2fix0");
test_checki(double2fix(3.5, 8), 0x380, "double2fix1");
test_checki(double2fix(-3.5, 8), -0x380, "double2fix2");
test_checki(double2fix(32768.0, 16), INT32_MAX, "double2fix3");
test_checki(double2fix(65536.0, 16), INT32_MAX, "double2fix4");
test_checki(double2fix(-65536.0, 16), INT32_MIN, "double2fix4b");
test_checki(double2fix(INFINITY, 16), INT32_MAX, "double2fix5");
test_checki(double2fix(-INFINITY, 16), INT32_MIN, "double2fix5b");
test_checki(double2fix(INFINITY, -16), INT32_MAX, "double2fix5c");
test_checki(double2fix(-INFINITY, -16), INT32_MIN, "double2fix5d");
test_checki(double2fix(3.24999, 2), 12, "double2fix6");
test_checki(double2fix(3.25, 2), 13, "double2fix7");
test_checki(double2fix(-3.24999, 2), -13, "double2fix8");
test_checki(double2fix(-3.25, 2), -13, "double2fix9");
test_checki(double2fix(-0.75, 1), -2, "double2fix10");
test_checki(double2fix(-3.0, -1), -2, "double2fix11"); // not very useful
test_checki(double2fix(0.0, 16), 0, "double2fix12");
test_checki(double2fix(-0.0, 16), 0, "double2fix13");
test_checki(double2fix(0.0, -16), 0, "double2fix14");
test_checki(double2fix(-0.0, -16), 0, "double2fix15");
printf("double2ufix\n");
test_checku(double2ufix(3.5, 8), 0x380, "double2ufix1");
test_checku(double2ufix(-3.5, 8), 0, "double2ufix2");
test_checku(double2ufix(32768.0, 16), 32768 << 16, "double2ufix3");
test_checku(double2ufix(65536.0, 16), UINT32_MAX, "double2ufix4");
test_checku(double2ufix(INFINITY, 16), UINT32_MAX, "double2ufix5");
test_checku(double2ufix(-INFINITY, 16), 0, "double2ufix5b");
test_checku(double2ufix(INFINITY, -16), UINT32_MAX, "double2ufix5c");
test_checku(double2ufix(-INFINITY, -16), 0, "double2ufix5d");
test_checku(double2ufix(3.24999, 2), 12, "double2ufix6");
test_checku(double2ufix(3.25, 2), 13, "double2ufix7");
test_checku(double2ufix(3.0, -1), 1, "double2ufix8"); // not very useful
test_checki(double2ufix(0.0, 16), 0, "double2ufix12");
test_checki(double2ufix(-0.0, 16), 0, "double2fix13");
test_checki(double2ufix(0.0, -16), 0, "double2ufix14");
test_checki(double2ufix(-0.0, -16), 0, "double2fix15");
printf("double2fix64\n");
test_checki64(double2fix64(3.5, 8), 0x380, "double2fix641");
test_checki64(double2fix64(-3.5, 8), -0x380, "double2fix642");
test_checki64(double2fix64(32768.0, 16), 32768ll << 16, "double2fix643");
test_checki64(double2fix64(65536.0, 16), 65536ll << 16, "double2fix644");
test_checki64(double2fix64(2147483648.0, 16), 2147483648ll << 16, "double2ufix644b");
test_checki64(double2fix64(65536.0 * 65536.0 * 32768.0, 16), INT64_MAX, "double2fix644c");
test_checki64(double2fix64(INFINITY, 16), INT64_MAX, "double2fix645");
test_checki64(double2fix64(-INFINITY, 16), INT64_MIN, "double2fix645b");
test_checki64(double2fix64(INFINITY, -16), INT64_MAX, "double2fix645c");
test_checki64(double2fix64(-INFINITY, -16), INT64_MIN, "double2fix645d");
test_checki64(double2fix64(3.24999, 2), 12, "double2fix646");
test_checki64(double2fix64(3.25, 2), 13, "double2fix647");
test_checki64(double2fix64(-3.24999, 2), -13, "double2fix648");
test_checki64(double2fix64(-3.25, 2), -13, "double2fix649");
test_checki64(double2fix64(-3.0, -1), -2, "double2fix6410"); // not very useful
test_checki64(double2fix64(2147483648.0 * 2147483648.0, 16), INT64_MAX, "double2ufix6411");
test_checki64(double2fix64(0.0, 16), 0, "double2fix6412");
test_checki64(double2fix64(-0.0, 16), 0, "double2fix6413");
test_checki64(double2fix64(0.0, -16), 0, "double2fix6412b");
test_checki64(double2fix64(-0.0, -16), 0, "double2fix6413b");
test_checki64(double2fix64(-3.25, 40), -13ll * (1ll << 38), "double2fix6414");
u64d.u = 0xc00a000000000001;
test_checki64(double2fix64(u64d.d, 40), -13ll * (1ll << 38) - 1ll, "double2fix6414b");
u64d.u = 0xc00a000080000001;
test_checki64(double2fix64(u64d.d, 20), -13ll * (1ll << 18) - 2ll, "double2fix6415c");
u64d.u = 0xc00a000080000000;
test_checki64(double2fix64(u64d.d, 20), -13ll * (1ll << 18) - 1ll, "double2fix6415d");
u64d.u = 0xc00a000000000001;
test_checki64(double2fix64(u64d.d, 20), -13ll * (1ll << 18) - 1ll, "double2fix6415e");
u64d.u = 0xc00a000000000000;
test_checki64(double2fix64(u64d.d, 20), -13ll * (1ll << 18), "double2fix6415g");
u64d.u = 0xc00a000080000001;
test_checki64(double2fix64(u64d.d, 19), -13ll * (1ll << 17) - 1ll, "double2fix6415h");
u64d.u = 0xc00a000080000000;
test_checki64(double2fix64(u64d.d, 19), -13ll * (1ll << 17) - 1ll, "double2fix6415i");
u64d.u = 0xc00a000000000001;
test_checki64(double2fix64(u64d.d, 19), -13ll * (1ll << 17) - 1ll, "double2fix6415j");
u64d.u = 0xc00a000000000000;
test_checki64(double2fix64(u64d.d, 19), -13ll * (1ll << 17), "double2fix6415k");
printf("double2ufix64\n");
test_checku64(double2ufix64(3.5, 8), 0x380, "double2ufix641");
test_checku64(double2ufix64(-3.5, 8), 0, "double2ufix642");
test_checku64(double2ufix64(32768.0, 16), 32768ull << 16, "double2ufix643");
test_checku64(double2ufix64(65536.0, 16), 65536ull << 16, "double2ufix644");
test_checku64(double2ufix64(2147483648.0, 16), 2147483648ull << 16, "double2ufix644b");
test_checku64(double2ufix64(INFINITY, 16), UINT64_MAX, "double2ufix645");
test_checku64(double2ufix64(-INFINITY, 16), 0, "double2ufix645b");
test_checku64(double2ufix64(INFINITY, -16), UINT64_MAX, "double2ufix645c");
test_checku64(double2ufix64(-INFINITY, -16), 0, "double2ufix645d");
test_checku64(double2ufix64(3.24999, 2), 12, "double2ufix646");
test_checku64(double2ufix64(3.25, 2), 13, "double2ufix647");
test_checku64(double2ufix64(3.0, -1), 1, "double2ufix648"); // not very useful
test_checki64(double2ufix64(0.0, 16), 0, "double2ufix649");
test_checki64(double2ufix64(-0.0, 16), 0, "double2ufix6410");
printf("double2fix_z\n");
test_checki(double2fix_z(3.5, 8), 0x380, "double2fix_z1");
test_checki(double2fix_z(-3.5, 8), -0x380, "double2fix_z2");
test_checki(double2fix_z(32768.0, 16), INT32_MAX, "double2fix_z3");
test_checki(double2fix_z(65536.0, 16), INT32_MAX, "double2fix_z4");
test_checki(double2fix_z(INFINITY, 16), INT32_MAX, "double2fix_z5");
test_checki(double2fix_z(-INFINITY, 16), INT32_MIN, "double2fix_z5b");
test_checki(double2fix_z(INFINITY, -50), INT32_MAX, "double2fix_z5c");
test_checki(double2fix_z(-INFINITY, -50), INT32_MIN, "double2fix_z5d");
test_checki(double2fix_z(3.24999, 2), 12, "double2fix_z6");
test_checki(double2fix_z(3.25, 2), 13, "double2fix_z7");
test_checki(double2fix_z(-3.24999, 2), -12, "double2fix_z8");
test_checki(double2fix_z(-3.25, 2), -13, "double2fix_z9");
test_checki(double2fix_z(-0.75, 1), -1, "double2fix_z10");
test_checki(double2fix_z(-3.0, -1), -1, "double2fix_z11"); // not very useful
test_checki(double2fix_z(0.0, 16), 0, "double2fix_z12");
test_checki(double2fix_z(-0.0, 16), 0, "double2fix_z13");
test_checki(double2fix_z(0.0, -16), 0, "double2fix_z12b");
test_checki(double2fix_z(-0.0, -16), 0, "double2fix_z13b");
printf("double2ufix_z\n");
test_checku(double2ufix_z(3.5, 8), 0x380, "double2ufix_z1");
test_checku(double2ufix_z(-3.5, 8), 0, "double2ufix_z2");
test_checku(double2ufix_z(32768.0, 16), 32768 << 16, "double2ufix_z3");
test_checku(double2ufix_z(65536.0, 16), UINT32_MAX, "double2ufix_z4");
test_checku(double2ufix_z(INFINITY, 16), UINT32_MAX, "double2ufix_z5");
test_checku(double2ufix_z(-INFINITY, 16), 0, "double2ufix_z5b");
test_checku(double2ufix_z(INFINITY, 16), UINT32_MAX, "double2ufix_z5c");
test_checku(double2ufix_z(-INFINITY, 16), 0, "double2ufix_z5d");
test_checku(double2ufix_z(3.24999, 2), 12, "double2ufix_z6");
test_checku(double2ufix_z(3.25, 2), 13, "double2ufix_z7");
test_checku(double2ufix_z(3.0, -1), 1, "double2ufix_z8"); // not very useful
test_checki(double2ufix_z(0.0, 16), 0, "double2fix_z9");
test_checki(double2ufix_z(-0.0, 16), 0, "double2fix_z10");
test_checki(double2ufix_z(0.0, -16), 0, "double2fix_z11");
test_checki(double2ufix_z(-0.0, -16), 0, "double2fix_z12");
printf("double2fix64_z\n");
test_checki64(double2fix64_z(3.5, 8), 0x380, "double2fix64_z1");
test_checki64(double2fix64_z(-3.5, 8), -0x380, "double2fix64_z2");
test_checki64(double2fix64_z(32768.0, 16), 32768ll << 16, "double2fix64_z3");
test_checki64(double2fix64_z(65536.0, 16), 65536ll << 16, "double2fix64_z4");
test_checki64(double2fix64_z(65536.0 * 65536.0 * 32768.0, 16), INT64_MAX, "double2fix64_z4b");
test_checki64(double2fix64_z(INFINITY, 16), INT64_MAX, "double2fix64_z5");
test_checki64(double2fix64_z(-INFINITY, 16), INT64_MIN, "double2fix64_z5");
test_checki64(double2fix64_z(INFINITY, 16), INT64_MAX, "double2fix64_z5");
test_checki64(double2fix64_z(-INFINITY, 16), INT64_MIN, "double2fix64_z5");
test_checki64(double2fix64_z(3.24999, 2), 12, "double2fix64_z6");
test_checki64(double2fix64_z(3.25, 2), 13, "double2fix64_z7");
test_checki64(double2fix64_z(-3.24999, 2), -12, "double2fix64_z8");
test_checki64(double2fix64_z(-3.25, 2), -13, "double2fix64_z9");
test_checki64(double2fix64_z(-3.0, -1), -1, "double2fix64_z10"); // not very useful
test_checki64(double2fix64_z(0.0, 16), 0, "double2fix64_z11");
test_checki64(double2fix64_z(-0.0, 16), 0, "double2fix64_z12");
test_checki64(double2fix64_z(0.0, -16), 0, "double2fix64_z13");
test_checki64(double2fix64_z(-0.0, -16), 0, "double2fix64_z14");
test_checki64(double2fix64_z(-3.25, 40), -13ll * (1ll << 38), "double2fix64_z15");
u64d.u = 0xc00a000000000001;
test_checki64(double2fix64_z(u64d.d, 40), -13ll * (1ll << 38), "double2fix64_z15b");
u64d.u = 0xc00a000080000001;
test_checki64(double2fix64_z(u64d.d, 20), -13ll * (1ll << 18) - 1ll, "double2fix64_z15c");
u64d.u = 0xc00a000080000000;
test_checki64(double2fix64_z(u64d.d, 20), -13ll * (1ll << 18) - 1ll, "double2fix64_z15d");
u64d.u = 0xc00a000000000001;
test_checki64(double2fix64_z(u64d.d, 20), -13ll * (1ll << 18), "double2fix64_z15e");
u64d.u = 0xc00a000000000000;
test_checki64(double2fix64_z(u64d.d, 20), -13ll * (1ll << 18), "double2fix64_z15g");
u64d.u = 0xc00a000080000001;
test_checki64(double2fix64_z(u64d.d, 19), -13ll * (1ll << 17), "double2fix64_z15h");
u64d.u = 0xc00a000080000000;
test_checki64(double2fix64_z(u64d.d, 19), -13ll * (1ll << 17), "double2fix64_z15i");
u64d.u = 0xc00a000000000001;
test_checki64(double2fix64_z(u64d.d, 19), -13ll * (1ll << 17), "double2fix64_z15j");
u64d.u = 0xc00a000000000000;
test_checki64(double2fix64_z(u64d.d, 19), -13ll * (1ll << 17), "double2fix64_z15k");
printf("double2ufix64_z\n");
test_checku64(double2ufix64_z(3.5, 8), 0x380, "double2ufix64_z1");
test_checku64(double2ufix64_z(-3.5, 8), 0, "double2ufix64_z2");
test_checku64(double2ufix64_z(32768.0, 16), 32768ll << 16, "double2ufix64_z3");
test_checku64(double2ufix64_z(65536.0, 16), 65536ll << 16, "double2ufix64_z4");
test_checki64(double2ufix64_z(65536.0 * 65536.0 * 65536.0, 16), UINT64_MAX, "double2fix64_z4b");
test_checku64(double2ufix64_z(INFINITY, 16), UINT64_MAX, "double2ufix64_z5");
test_checku64(double2ufix64_z(-INFINITY, 16), 0, "double2ufix64_z5b");
test_checku64(double2ufix64_z(INFINITY, 16), UINT64_MAX, "double2ufix64_z5c");
test_checku64(double2ufix64_z(-INFINITY, 16), 0, "double2ufix64_z5d");
test_checku64(double2ufix64_z(3.24999, 2), 12, "double2ufix64_z6");
test_checku64(double2ufix64_z(3.25, 2), 13, "double2ufix64_z7");
test_checki64(double2ufix64_z(3.0, -1), 1, "double2fuix64_z8"); // not very useful
test_checki64(double2ufix64_z(0.0, 16), 0, "double2ufix64_z9");
test_checki64(double2ufix64_z(-0.0, 16), 0, "double2ufix64_z10");
test_checki64(double2ufix64_z(0.0, -16), 0, "double2ufix64_z11");
test_checki64(double2ufix64_z(-0.0, -16), 0, "double2ufix64_z12");
printf("double2int\n");
test_checki(double2int(0.0), 0, "double2int1");
test_checki(double2int(0.25), 0, "double2int1b");
test_checki(double2int(0.5), 0, "double2int2");
test_checki(double2int(0.75), 0, "double2int2b");
test_checki(double2int(1.0), 1, "double2int3");
test_checki(double2int(-10.0), -10, "double2int3a");
test_checki(double2int(-0.0), 0, "double2int3b");
test_checki(double2int(-0.25), -1, "double2int4");
test_checki(double2int(-0.5), -1, "double2int4b");
test_checki(double2int(-0.75), -1, "double2int5");
test_checki(double2int(-1.0), -1, "double2int5b");
// todo test correct rounding around maximum precision
test_checki(double2int(2147483646.0), INT32_MAX-1, "double2int6");
test_checki(double2int(2147483647.0), INT32_MAX, "double2int6b");
test_checki(double2int(21474836470.0), INT32_MAX, "double2int7");
test_checki(double2int(-2147483648.0), INT32_MIN, "double2int8");
test_checki(double2int(-21474836480.0), INT32_MIN, "double2int9");
test_checki(double2int(-2.5), -3, "double2int10");
test_checki(double2int(-2.4), -3, "double2int11");
u64d.u = 0xc000000000000000ull;
test_checki(double2int(u64d.d), -2, "double2int12");
u64d.u = 0xc008000000000000ull;
test_checki(double2int(u64d.d), -3, "double2int12b");
u64d.u = 0xc000000000000001ull;
test_checki(double2int(u64d.d), -3, "double2int12c");
u64d.u = 0xc000000080000000ull;
test_checki(double2int(u64d.d), -3, "double2int12d");
u64d.u = 0xc000000100000000ull;
test_checki(double2int(u64d.d), -3, "double2int12e");
u64d.u = 0xc000000100000001ull;
test_checki(double2int(u64d.d), -3, "double2int12f");
test_checki(double2int(-2147483647.0), INT32_MIN+1, "double2int13");
test_checki(double2int(-2147483647.1), INT32_MIN, "double2int14");
test_checki(double2int(-2147483647.9), INT32_MIN, "double2int15");
test_checki(double2int(-2147483648.0), INT32_MIN, "double2int16");
test_checki(double2int(-2147483648.1), INT32_MIN, "double2int17");
test_checki(double2int(-21474836480.1), INT32_MIN, "double2int18");
printf("double2uint\n");
test_checku(double2uint(0.0), 0, "double2uint1");
test_checku(double2uint(0.25), 0, "double2uint2");
test_checku(double2uint(0.5), 0, "double2uint3");
test_checku(double2uint(0.75), 0, "double2uint4");
test_checku(double2uint(1.0), 1, "double2uint5");
test_checku(double2uint(2147483647.0), INT32_MAX, "double2uint6");
test_checku(double2uint(2147483648.0), INT32_MAX+1u, "double2uint7");
test_checku(double2uint(4294967294.5), UINT32_MAX-1, "double2uint8");
test_checku(double2uint(4294967295.0), UINT32_MAX, "double2uint9");
test_checku(double2uint(42949672950.0), UINT32_MAX, "double2uint10");
printf("double2int64\n");
test_checki64(double2int64(0.0), 0, "double2int641");
test_checki64(double2int64(0.25), 0, "double2int641b");
test_checki64(double2int64(0.5), 0, "double2int642");
test_checki64(double2int64(0.75), 0, "double2int642b");
test_checki64(double2int64(1.0), 1, "double2int643");
test_checki64(double2int64(-10.0), -10, "double2int643a");
test_checki64(double2int64(-0.0), 0, "double2int643b");
test_checki64(double2int64(-0.25), -1, "double2int644");
test_checki64(double2int64(-0.5), -1, "double2int644b");
test_checki64(double2int64(-0.75), -1, "double2int645");
test_checki64(double2int64(-1.0), -1, "double2int645b");
// todo test correct rounding around maximum precision
test_checki64(double2int64(2147483647.0), INT32_MAX, "double2int646");
test_checki64(double2int64(21474836470.0), 21474836470ll, "double2int647");
test_checki64(double2int64(-2147483648.0), INT32_MIN, "double2int648");
test_checki64(double2int64(-21474836480.0), -21474836480ll, "double2int649");
test_checki64(double2int64(-2.5), -3, "double2int6410");
test_checki64(double2int64(-2.4), -3, "double2int6411");
u64d.u = 0xc000000000000000ull;
test_checki64(double2int64(u64d.d), -2, "double2int6412");
u64d.u = 0xc008000000000000ull;
test_checki64(double2int64(u64d.d), -3, "double2int6412b");
u64d.u = 0xc000000000000001ull;
test_checki64(double2int64(u64d.d), -3, "double2int6412c");
u64d.u = 0xc000000080000000ull;
test_checki64(double2int64(u64d.d), -3, "double2int6412d");
u64d.u = 0xc000000100000000ull;
test_checki64(double2int64(u64d.d), -3, "double2int6412e");
u64d.u = 0xc000000100000001ull;
test_checki64(double2int64(u64d.d), -3, "double2int6412f");
printf("double2uint64\n");
test_checku64(double2uint64(0.0), 0, "double2uint641");
test_checku64(double2uint64(0.25), 0, "double2uint642");
test_checku64(double2uint64(0.5), 0, "double2uint643");
test_checku64(double2uint64(0.75), 0, "double2uint644");
test_checku64(double2uint64(1.0), 1, "double2uint645");
test_checku64(double2uint64(2147483647.0), INT32_MAX, "double2uint646");
test_checku64(double2uint64(2147483648.0), INT32_MAX+1u, "double2uint647");
// todo test correct rounding around maximum precision
test_checku64(double2uint64(4294967294.5), 4294967294ull, "double2uint648");
test_checku64(double2uint64(4294967295.0), 4294967295ull, "double2uint649");
test_checku64(double2uint64(42949672950.0), 42949672950, "double2uint6410");
#endif
// // These methods round towards 0.
printf("double2int_z\n");
test_checki(double2int_z(0.0), 0, "double2int_z1");
test_checki(double2int_z(0.25), 0, "double2int_z1b");
test_checki(double2int_z(0.5), 0, "double2int_z2");
test_checki(double2int_z(0.75), 0, "double2int_z2b");
test_checki(double2int_z(1.0), 1, "double2int_z3");
test_checki(double2int_z(-10.0), -10, "double2int_z3a");
test_checki(double2int_z(-0.0), 0, "double2int_z3b");
test_checki(double2int_z(-0.25), 0, "double2int_z4");
test_checki(double2int_z(-0.5), 0, "double2int_z4b");
test_checki(double2int_z(-0.75), 0, "double2int_z5");
test_checki(double2int_z(-1.0), -1, "double2int_z5b");
// todo test correct rounding around maximum precision
test_checki(double2int_z(2147483647.0), INT32_MAX, "double2int_z6");
test_checki(double2int_z(21474836470.0), INT32_MAX, "double2int_z7");
test_checki(double2int_z(-2147483648.0), INT32_MIN, "double2int_z8");
test_checki(double2int_z(-21474836480.0), INT32_MIN, "double2int_z9");
test_checki(double2int_z(-2.5), -2, "double2int_z10");
test_checki(double2int_z(-2.4), -2, "double2int_z11");
u64d.u = 0xc000000000000000ull;
test_checki(double2int_z(u64d.d), -2, "double2int_z12");
u64d.u = 0xc008000000000000ull;
test_checki(double2int_z(u64d.d), -3, "double2int_z12b");
u64d.u = 0xc000000000000001ull;
test_checki(double2int_z(u64d.d), -2, "double2int_z12c");
u64d.u = 0xc000000080000000ull;
test_checki(double2int_z(u64d.d), -2, "double2int_z12d");
u64d.u = 0xc000000100000000ull;
test_checki(double2int_z(u64d.d), -2, "double2int_z12e");
u64d.u = 0xc000000100000001ull;
test_checki(double2int_z(u64d.d), -2, "double2int_z12f");
printf("double2int64_z\n");
test_checki64(double2int64_z(0.0), 0, "double2int64_z1");
test_checki64(double2int64_z(0.25), 0, "double2int64_z1b");
test_checki64(double2int64_z(0.5), 0, "double2int64_z2");
test_checki64(double2int64_z(0.75), 0, "double2int64_z2b");
test_checki64(double2int64_z(1.0), 1, "double2int64_z3");
test_checki64(double2int64_z(-10.0), -10, "double2int64_z3a");
test_checki64(double2int64_z(-0.0), 0, "double2int64_z3b");
test_checki64(double2int64_z(-0.25), 0, "double2int64_z4");
test_checki64(double2int64_z(-0.5), 0, "double2int64_z4b");
test_checki64(double2int64_z(-0.75), 0, "double2int64_z5");
test_checki64(double2int64_z(-1.0), -1, "double2int64_z5b");
// todo test correct rounding around maximum precision
test_checki64(double2int64_z(2147483647.0), 2147483647ll, "double2int64_z6");
test_checki64(double2int64_z(21474836470.0), 21474836470ll, "double2int64_z7");
test_checki64(double2int64_z(-2147483648.0), INT32_MIN, "double2int64_z8");
test_checki64(double2int64_z(-21474836480.0), -21474836480ll, "double2int64_z9");
test_checki64(double2int64_z(-2.5), -2, "double2int64_z10");
test_checki64(double2int64_z(-2.4), -2, "double2int64_z11");
printf("double2uint_z\n");
test_checku(double2uint_z(0.0), 0, "double2uint_z1");
test_checku(double2uint_z(0.25), 0, "double2uint_z2");
test_checku(double2uint_z(0.5), 0, "double2uint_z3");
test_checku(double2uint_z(0.75), 0, "double2uint_z4");
test_checku(double2uint_z(1.0), 1, "double2uint_z5");
test_checku(double2uint_z(2147483647.0), INT32_MAX, "double2uint_z6");
test_checku(double2uint_z(2147483648.0), INT32_MAX+1u, "double2uint_z7");
// todo test correct rounding around maximum precision
test_checku(double2uint_z(4294967294.5), UINT32_MAX-1u, "double2uint_z8");
test_checku(double2uint_z(4294967295.0), UINT32_MAX, "double2uint_z9");
test_checku(double2uint_z(42949672950.0), UINT32_MAX, "double2uint_z10");
printf("double2uint64_z\n");
test_checku64(double2uint64_z(0.0), 0, "double2uint64_z1");
test_checku64(double2uint64_z(0.25), 0, "double2uint64_z2");
test_checku64(double2uint64_z(0.5), 0, "double2uint64_z3");
test_checku64(double2uint64_z(0.75), 0, "double2uint64_z4");
test_checku64(double2uint64_z(1.0), 1, "double2uint64_z5");
test_checku64(double2uint64_z(2147483647.0), INT32_MAX, "double2uint64_z6");
test_checku64(double2uint64_z(2147483648.0), INT32_MAX+1u, "double2uint64_z7");
// todo test correct rounding around maximum precision
test_checku64(double2uint64_z(4294967294.5), 4294967294ull, "double2uint64_z8");
test_checku64(double2uint64_z(4294967295.0), 4294967295ull, "double2uint64_z9");
test_checku64(double2uint64_z(4294967296.0), 4294967296ull, "double2uint64_z9b");
test_checku64(double2uint64_z(42949672950.0), 42949672950ull, "double2uint64_z10");
// double exp10(double x);
// void sincos(double x, double *sinx, double *cosx);
// double powint(double x, int y);
return rc;
}
int main() {
stdio_init_all();
int rc = test();
if (rc) {
printf("FAILED\n");
} else {
printf("PASSED\n");
}
}

View File

@@ -0,0 +1,402 @@
#include <stdio.h>
#include "pico/stdlib.h"
#include "pico/float.h"
#include "math.h"
#if 0
#define printf(...) ((void)0)
#endif
#if 0
#define stop() return -1
#else
#define stop() rc=1
#endif
#define test_assert(x) ({ if (!(x)) { printf("Assertion failed: ");puts(#x);printf(" at " __FILE__ ":%d\n", __LINE__); stop(); } })
#define test_checkf(x, expected, msg) ({ if ((x) != (expected)) { printf(" %s: %f != %f\n", msg, x, expected); stop(); } })
#define test_checki(x, expected, msg) ({ if ((x) != (expected)) { printf(" %s: %d != %d\n", msg, x, expected); stop(); } })
#define test_checku(x, expected, msg) ({ if ((uint32_t)(x) != (uint32_t)(expected)) { printf(" %s: %u != %u\n", msg, x, expected); stop(); } })
#define test_checki64(x, expected, msg) ({ if ((x) != (expected)) { printf(" %s: %lld != %lld\n", msg, (int64_t)(x), (int64_t)(expected)); stop(); } })
#define test_checku64(x, expected, msg) ({ if ((uint64_t)(x) != (uint64_t)(expected)) { printf(" %s: %llu != %llu\n", msg, (uint64_t)(x), (uint64_t)(expected)); stop(); } })
#if !(LIB_PICO_FLOAT_COMPILER || defined(__riscv))
static inline float fix2float_8(int32_t m) { return fix2float(m, 8); }
static inline float fix2float_12(int32_t m) { return fix2float(m, 12); }
static inline float fix2float_16(int32_t m) { return fix2float(m, 16); }
static inline float fix2float_24(int32_t m) { return fix2float(m, 24); }
static inline float fix2float_28(int32_t m) { return fix2float(m, 28); }
static inline float fix2float_32(int32_t m) { return fix2float(m, 32); }
static inline float ufix2float_12(int32_t m) { return ufix2float(m, 12); }
static inline float float2fix_12(int32_t m) { return float2fix(m, 12); }
static inline float float2ufix_12(int32_t m) { return float2ufix(m, 12); }
#endif
#if 1 && (LIB_PICO_FLOAT_COMPILER || defined(__riscv))
#if __SOFTFP__ || defined(__riscv)
#define FREG "+r"
#else
#define FREG "+t"
#endif
// prevent the compiler from eliding the calculations
#define float2int_z(f) ({ float _f = f; pico_default_asm_volatile("" : FREG (_f)); float2 ## int_z(_f); })
#define float2uint_z(f) ({ float _f = f; pico_default_asm_volatile("" : FREG (_f)); float2 ## uint_z(_f); })
#define float2int64_z(f) ({ float _f = f; pico_default_asm_volatile("" : FREG (_f)); float2 ## int64_z(_f); })
#define float2uint64_z(f) ({ float _f = f; pico_default_asm_volatile("" : FREG (_f)); float2 ## uint64_z(_f); })
#define int2float(i) ({ int32_t _i = i; pico_default_asm_volatile("" : "+r" (_i)); int2 ## float(_i); })
#define uint2float(i) ({ uint32_t _i = i; pico_default_asm_volatile("" : "+r" (_i)); uint2 ## float(_i); })
#define int642float(i) ({ int64_t _i = i; pico_default_asm_volatile("" : "+r" (_i)); int642 ## float(_i); })
#define uint642float(i) ({ uint64_t _i = i; pico_default_asm_volatile("" : "+r" (_i)); uint642 ## float(_i); })
#endif
#if 1 && LIB_PICO_FLOAT_VFP
// prevet the compiler from eliding the calculations
#undef float2int_z
#undef float2uint_z
#undef int2float
#undef uint2float
#endif
int test() {
int rc = 0;
#if LIB_PICO_FLOAT_PICO_DCP
printf(">>> Using DCP\n");
#endif
#if LIB_PICO_FLOAT_PICO_VFP
printf(">>> Using VFP\n");
#endif
printf("int2float\n");
test_checkf(int2float(0), 0.0f, "int2float1");
test_checkf(int2float(-1), -1.0f, "int2float2");
test_checkf(int2float(1), 1.0f, "int2float3");
test_checkf(int2float(INT32_MAX), 2147483647.0f, "int2float4");
test_checkf(int2float(INT32_MIN), -2147483648.0f, "int2float5");
// check rounding
test_checkf(int2float(2147483391), 2147483392.0f, "int2float6");
test_checkf(int2float(2147483456), 2147483392.0f, "int2float7");
test_checkf(int2float(2147483457), 2147483520.0f, "int2float8");
test_checkf(int2float(2147483483), 2147483520.0f, "int2float9");
test_checkf(int2float(2147483584), 2147483648.0f, "int2float10");
printf("uint2float\n");
test_checkf(uint2float(0), 0.0f, "uint2float1");
test_checkf(uint2float(1), 1.0f, "uint2float2");
test_checkf(uint2float(INT32_MAX), 2147483647.0f, "uint2float3");
// todo test correct rounding around maximum precision
test_checkf(uint2float(UINT32_MAX), 4294967295.0f, "uint2float4");
printf("int642float\n");
test_checkf(int642float(0), 0.0f, "int642float1");
test_checkf(int642float(-1), -1.0f, "int642float2");
test_checkf(int642float(1), 1.0f, "int642float3");
test_checkf(int642float(INT32_MAX-1), 2147483646.0f, "int642float4"); // note equality is within 1ulp
test_checkf(int642float(INT32_MAX), 2147483647.0f, "int642float5"); // note equality is within 1ulp
test_checkf(int642float(INT32_MAX+1ll), 2147483648.0f, "int642float6");
test_checkf(int642float(INT32_MIN-1ll), -2147483649.0f, "int642float7"); // note equality is within 1ulp
test_checkf(int642float(INT32_MIN), -2147483648.0f, "int642float8");
test_checkf(int642float(INT32_MIN+1ll), -2147483647.0f, "int642float9"); // note equality is within 1ulp
// todo test correct rounding around maximum precision
test_checkf(int642float(INT64_MAX), 9223372036854775807.0f, "int642float10");
test_checkf(int642float(INT64_MIN), -9223372036854775808.0f, "int642float11");
printf("uint642float\n");
test_checkf(uint642float(0), 0.0f, "uint642float1");
test_checkf(uint642float(1), 1.0f, "uint642float2");
test_checkf(uint642float(INT32_MAX-1), 2147483646.0f, "uint642float3"); // note equality is within 1ulp
test_checkf(uint642float(INT32_MAX), 2147483647.0f, "uint642float4"); // note equality is within 1ulp
test_checkf(uint642float(INT32_MAX+1ll), 2147483648.0f, "uint642float5");
test_checkf(uint642float(INT64_MAX), 9223372036854775807.0f, "uint642float6");
// todo test correct rounding around maximum precision
test_checkf(uint642float(UINT64_MAX), 18446744073709551615.0f, "uint642float7");
union {
uint32_t u;
float f;
} u32f;
#if !(LIB_PICO_FLOAT_COMPILER || defined(__riscv))
printf("fix2float\n");
// todo test correct rounding around maximum precision
test_checkf(fix2float(-3, 1), -1.5f, "fix2float1");
test_checkf(fix2float(-3, 1), -1.5f, "fix2float2");
test_checkf(fix2float(-3, -4), -48.0f, "fix2float3");
printf("ufix2float\n");
// todo test correct rounding around maximum precision
test_checkf(ufix2float(0xa0000000, 30), 2.5f, "ufix2float1");
test_checkf(ufix2float(3, -4), 48.0f, "ufix2float2");
printf("fix642float\n");
// todo test correct rounding around maximum precision
test_checkf(fix642float(-0xa000000000ll, 38), -2.5f, "fix6422float1");
test_checkf(fix642float(-3, -34), -51539607552.0f, "fix642float2");
printf("ufix642float\n");
// todo test correct rounding around maximum precision
test_checkf(ufix642float(0xa000000000ll, 38), 2.5f, "ufix642float1");
test_checkf(ufix642float(3, -34), 51539607552.0f, "fix64float2");
test_checkf(fix2float_8(128), 0.5f, "fix2float_8_1");
test_checkf(fix2float_8(-128), -0.5f, "fix2float_8_2");
test_checkf(fix2float_16(8192), 0.125f, "fix2float_8_3");
test_checkf(fix2float_16(-8192), -0.125f, "fix2float_8_4");
test_checkf(fix2float_24(3<<23), 1.5f, "fix2float_8_5");
test_checkf(fix2float_24(-(3<<23)), -1.5f, "fix2float_8_6");
printf("float2fix\n");
test_checki(float2fix(-0.5f, 8), -0x80, "float2fix0");
test_checki(float2fix(3.5f, 8), 0x380, "float2fix1");
test_checki(float2fix(-3.5f, 8), -0x380, "float2fix2");
test_checki(float2fix(32768.0f, 16), INT32_MAX, "float2fix3");
test_checki(float2fix(65536.0f, 16), INT32_MAX, "float2fix4");
test_checki(float2fix(-65536.0f, 16), INT32_MIN, "float2fix4b");
test_checki(float2fix(INFINITY, 16), INT32_MAX, "float2fix5");
test_checki(float2fix(-INFINITY, 16), INT32_MIN, "float2fix5b");
test_checki(float2fix(3.24999f, 2), 12, "float2fix6");
test_checki(float2fix(3.25f, 2), 13, "float2fix7");
test_checki(float2fix(-3.24999f, 2), -13, "float2fix8");
test_checki(float2fix(-3.25f, 2), -13, "float2fix9");
test_checki(float2fix(-0.75f, 1), -2, "float2fix10");
test_checki(float2fix(-3.0f, -1), -2, "float2fix11"); // not very useful
u32f.u = 0x7f012345;
test_checki(float2fix(u32f.f, 1), INT32_MAX, "float2fix12");
u32f.u = 0xff012345;
test_checki(float2fix(u32f.f, 1), INT32_MIN, "float2fix13");
printf("float2ufix\n");
test_checku(float2ufix(3.5f, 8), 0x380, "float2ufix1");
test_checku(float2ufix(-3.5f, 8), 0, "float2ufix2");
test_checku(float2ufix(32768.0f, 16), 32768 << 16, "float2ufix3");
test_checku(float2ufix(65536.0f, 16), UINT32_MAX, "float2ufix4");
test_checku(float2ufix(INFINITY, 16), UINT32_MAX, "float2ufix5");
test_checku(float2ufix(3.24999f, 2), 12, "float2ufix6");
test_checku(float2ufix(3.25f, 2), 13, "float2ufix7");
test_checku(float2ufix(3.0f, -1), 1, "float2ufix8"); // not very useful
printf("float2fix64\n");
test_checki64(float2fix64(3.5f, 8), 0x380, "float2fix641");
test_checki64(float2fix64(-3.5f, 8), -0x380, "float2fix642");
test_checki64(float2fix64(32768.0f, 16), 32768ll << 16, "float2fix643");
test_checki64(float2fix64(65536.0f, 16), 65536ll << 16, "float2fix644");
test_checki64(float2fix64(2147483648.0f, 16), 2147483648ll << 16, "float2ufix644b");
test_checki64(float2fix64(65536.0f * 65536.0f * 32768.0f, 16), INT64_MAX, "float2fix644c");
test_checki64(float2fix64(INFINITY, 16), INT64_MAX, "float2fix645");
test_checki64(float2fix64(3.24999f, 2), 12, "float2fix646");
test_checki64(float2fix64(3.25f, 2), 13, "float2fix647");
test_checki64(float2fix64(-3.24999f, 2), -13, "float2fix648");
test_checki64(float2fix64(-3.25f, 2), -13, "float2fix649");
test_checki64(float2fix64(-3.0f, -1), -2, "float2fix6410"); // not very useful
printf("float2ufix64\n");
test_checku64(float2ufix64(3.5f, 8), 0x380, "float2ufix641");
test_checku64(float2ufix64(-3.5f, 8), 0, "float2ufix642");
test_checku64(float2ufix64(32768.0f, 16), 32768ull << 16, "float2ufix643");
test_checku64(float2ufix64(65536.0f, 16), 65536ull << 16, "float2ufix644");
test_checku64(float2ufix64(2147483648.0f, 16), 2147483648ull << 16, "float2ufix644b");
test_checku64(float2ufix64(INFINITY, 16), UINT64_MAX, "float2ufix645");
test_checku64(float2ufix64(3.24999f, 2), 12, "float2ufix646");
test_checku64(float2ufix64(3.25f, 2), 13, "float2ufix647");
test_checku64(float2ufix64(3.0f, -1), 1, "float2ufix648"); // not very useful
printf("float2fix_z\n");
test_checki(float2fix_z(3.5f, 8), 0x380, "float2fix_z1");
test_checki(float2fix_z(-3.5f, 8), -0x380, "float2fix_z2");
test_checki(float2fix_z(32768.0f, 16), INT32_MAX, "float2fix_z3");
test_checki(float2fix_z(65536.0f, 16), INT32_MAX, "float2fix_z4");
test_checki(float2fix_z(INFINITY, 16), INT32_MAX, "float2fix_z5");
test_checki(float2fix_z(-INFINITY, 16), INT32_MIN, "float2fix_z5b");
test_checki(float2fix_z(3.24999f, 2), 12, "float2fix_z6");
test_checki(float2fix_z(3.25f, 2), 13, "float2fix_z7");
test_checki(float2fix_z(-3.24999f, 2), -12, "float2fix_z8");
test_checki(float2fix_z(-3.25f, 2), -13, "float2fix_z9");
test_checki(float2fix_z(-0.75f, 1), -1, "float2fix_z10");
test_checki(float2fix_z(-3.0f, -1), -1, "float2fix_z11"); // not very useful
u32f.u = 0x7f012345;
test_checki(float2fix_z(u32f.f, 1), INT32_MAX, "float2fix_z12");
u32f.u = 0xff012345;
test_checki(float2fix_z(u32f.f, 1), INT32_MIN, "float2fix_z13");
printf("float2ufix_z\n");
test_checku(float2ufix_z(3.5f, 8), 0x380, "float2ufix_z1");
test_checku(float2ufix_z(-3.5f, 8), 0, "float2ufix_z2");
test_checku(float2ufix_z(32768.0f, 16), 32768 << 16, "float2ufix_z3");
test_checku(float2ufix_z(65536.0f, 16), UINT32_MAX, "float2ufix_z4");
test_checku(float2ufix_z(INFINITY, 16), UINT32_MAX, "float2ufix_z5");
test_checku(float2ufix_z(3.24999f, 2), 12, "float2ufix_z6");
test_checku(float2ufix_z(3.25f, 2), 13, "float2ufix_z7");
test_checku(float2ufix_z(3.0f, -1), 1, "float2ufix_z8"); // not very useful
u32f.u = 0x7f012345;
test_checku(float2ufix_z(u32f.f, 1), UINT32_MAX, "float2fix_z9");
u32f.u = 0xff012345;
test_checku(float2ufix_z(u32f.f, 1), 0, "float2fix_z10");
printf("float2fix64_z\n");
test_checki64(float2fix64_z(3.5f, 8), 0x380, "float2fix64_z1");
test_checki64(float2fix64_z(-3.5f, 8), -0x380, "float2fix64_z2");
test_checki64(float2fix64_z(32768.0f, 16), 32768ll << 16, "float2fix64_z3");
test_checki64(float2fix64_z(65536.0f, 16), 65536ll << 16, "float2fix64_z4");
test_checki64(float2fix64_z(65536.0f * 65536.0f * 32768.0f, 16), INT64_MAX, "float2fix64_z4b");
test_checki64(float2fix64_z(INFINITY, 16), INT64_MAX, "float2fix64_z5");
test_checki64(float2fix64_z(3.24999f, 2), 12, "float2fix64_z6");
test_checki64(float2fix64_z(3.25f, 2), 13, "float2fix64_z7");
test_checki64(float2fix64_z(-3.24999f, 2), -12, "float2fix64_z8");
test_checki64(float2fix64_z(-3.25f, 2), -13, "float2fix64_z9");
test_checki64(float2fix64_z(-3.0f, -1), -1, "float2fix64_z10"); // not very useful
printf("float2ufix64_z\n");
test_checku64(float2ufix64_z(3.5f, 8), 0x380, "float2ufix64_z1");
test_checku64(float2ufix64_z(-3.5f, 8), 0, "float2ufix64_z2");
test_checku64(float2ufix64_z(32768.0f, 16), 32768ll << 16, "float2ufix64_z3");
test_checku64(float2ufix64_z(65536.0f, 16), 65536ll << 16, "float2ufix64_z4");
test_checki64(float2ufix64_z(65536.0f * 65536.0f * 65536.0f, 16), UINT64_MAX, "float2fix64_z4b");
test_checku64(float2ufix64_z(INFINITY, 16), UINT64_MAX, "float2ufix64_z5");
test_checku64(float2ufix64_z(3.24999f, 2), 12, "float2ufix64_z6");
test_checku64(float2ufix64_z(3.25f, 2), 13, "float2ufix64_z7");
test_checki64(float2ufix64_z(3.0f, -1), 1, "float2fuix64_z8"); // not very useful
printf("float2int\n");
test_checki(float2int(0.0f), 0, "float2int1");
test_checki(float2int(0.25f), 0, "float2int1b");
test_checki(float2int(0.5f), 0, "float2int2");
test_checki(float2int(0.75f), 0, "float2int2b");
test_checki(float2int(1.0f), 1, "float2int3");
test_checki(float2int(-10.0f), -10, "float2int3a");
test_checki(float2int(-0.0f), 0, "float2int3b");
test_checki(float2int(-0.25f), -1, "float2int4");
test_checki(float2int(-0.5f), -1, "float2int4b");
test_checki(float2int(-0.75f), -1, "float2int5");
test_checki(float2int(-1.0f), -1, "float2int5b");
// todo test correct rounding around maximum precision
test_checki(float2int(2147483647.0f), INT32_MAX, "float2int6");
test_checki(float2int(21474836470.0f), INT32_MAX, "float2int7");
test_checki(float2int(-2147483648.0f), INT32_MIN, "float2int8");
test_checki(float2int(-21474836480.0f), INT32_MIN, "float2int9");
test_checki(float2int(-2.5f), -3, "float2int10");
test_checki(float2int(-2.4f), -3, "float2int11");
printf("float2uint\n");
test_checku(float2uint(0.0f), 0, "float2uint1");
test_checku(float2uint(0.25f), 0, "float2uint2");
test_checku(float2uint(0.5f), 0, "float2uint3");
test_checku(float2uint(0.75f), 0, "float2uint4");
test_checku(float2uint(1.0f), 1, "float2uint5");
test_checku(float2uint(2147483647.0f), INT32_MAX+1u, "float2uint6"); // note loss of precision
test_checku(float2uint(2147483648.0f), INT32_MAX+1u, "float2uint7");
test_checku(float2uint(4294967294.5f), UINT32_MAX, "float2uint8"); // note loss of precision
test_checku(float2uint(4294967295.0f), UINT32_MAX, "float2uint9");
test_checku(float2uint(42949672950.0f), UINT32_MAX, "float2uint10");
printf("float2int64\n");
test_checki64(float2int64(0.0f), 0, "float2int641");
test_checki64(float2int64(0.25f), 0, "float2int641b");
test_checki64(float2int64(0.5f), 0, "float2int642");
test_checki64(float2int64(0.75f), 0, "float2int642b");
test_checki64(float2int64(1.0f), 1, "float2int643");
test_checki64(float2int64(-10.0f), -10, "float2int643a");
test_checki64(float2int64(-0.0f), 0, "float2int643b");
test_checki64(float2int64(-0.25f), -1, "float2int644");
test_checki64(float2int64(-0.5f), -1, "float2int644b");
test_checki64(float2int64(-0.75f), -1, "float2int645");
test_checki64(float2int64(-1.0f), -1, "float2int645b");
// todo test correct rounding around maximum precision
test_checki64(float2int64(2147483647.0f), INT32_MAX+1ll, "float2int646");
test_checki64(float2int64(21474836470.0f), 21474836480ll, "float2int647"); // note loss of precision
test_checki64(float2int64(-2147483648.0f), INT32_MIN, "float2int648");
test_checki64(float2int64(-21474836480.0f), -21474836480ll, "float2int649");
test_checki64(float2int64(-2.5f), -3, "float2int6410");
test_checki64(float2int64(-2.4f), -3, "float2int6411");
printf("float2uint64\n");
test_checku64(float2uint64(0.0f), 0, "float2uint641");
test_checku64(float2uint64(0.25f), 0, "float2uint642");
test_checku64(float2uint64(0.5f), 0, "float2uint643");
test_checku64(float2uint64(0.75f), 0, "float2uint644");
test_checku64(float2uint64(1.0f), 1, "float2uint645");
test_checku64(float2uint64(2147483647.0f), INT32_MAX+1u, "float2uint646"); // note loss of precision
test_checku64(float2uint64(2147483648.0f), INT32_MAX+1u, "float2uint647");
test_checku64(float2uint64(4294967294.5f), 4294967296ull, "float2uint648"); // note loss of precision
test_checku64(float2uint64(4294967295.0f), 4294967296ull, "float2uint649"); // note loss of precision
test_checku64(float2uint64(42949672950.0f), 42949672960ull, "float2uint6410"); // note loss of precision
#endif
// // These methods round towards 0.
printf("float2int_z\n");
test_checki(float2int_z(0.0f), 0, "float2int_z1");
test_checki(float2int_z(0.25f), 0, "float2int_z1b");
test_checki(float2int_z(0.5f), 0, "float2int_z2");
test_checki(float2int_z(0.75f), 0, "float2int_z2b");
test_checki(float2int_z(1.0f), 1, "float2int_z3");
test_checki(float2int_z(-10.0f), -10, "float2int_z3a");
test_checki(float2int_z(-0.0f), 0, "float2int_z3b");
test_checki(float2int_z(-0.25f), 0, "float2int_z4");
test_checki(float2int_z(-0.5f), 0, "float2int_z4b");
test_checki(float2int_z(-0.75f), 0, "float2int_z5");
test_checki(float2int_z(-1.0f), -1, "float2int_z5b");
// todo test correct rounding around maximum precision
test_checki(float2int_z(2147483647.0f), INT32_MAX, "float2int_z6");
test_checki(float2int_z(21474836470.0f), INT32_MAX, "float2int_z7");
test_checki(float2int_z(-2147483648.0f), INT32_MIN, "float2int_z8");
test_checki(float2int_z(-21474836480.0f), INT32_MIN, "float2int_z9");
test_checki(float2int_z(-2.5f), -2, "float2int_z10");
test_checki(float2int_z(-2.4f), -2, "float2int_z11");
printf("float2int64_z\n");
test_checki64(float2int64_z(0.0f), 0, "float2int64_z1");
test_checki64(float2int64_z(0.25f), 0, "float2int64_z1b");
test_checki64(float2int64_z(0.5f), 0, "float2int64_z2");
test_checki64(float2int64_z(0.75f), 0, "float2int64_z2b");
test_checki64(float2int64_z(1.0f), 1, "float2int64_z3");
test_checki64(float2int64_z(-10.0f), -10, "float2int64_z3a");
test_checki64(float2int64_z(-0.0f), 0, "float2int64_z3b");
test_checki64(float2int64_z(-0.25f), 0, "float2int64_z4");
test_checki64(float2int64_z(-0.5f), 0, "float2int64_z4b");
test_checki64(float2int64_z(-0.75f), 0, "float2int64_z5");
test_checki64(float2int64_z(-1.0f), -1, "float2int64_z5b");
test_checki64(float2int64_z(2147483647.0f), 2147483648ll, "float2int64_z6"); // note loss of precision
test_checki64(float2int64_z(21474836470.0f), 21474836480ll, "float2int64_z7"); // note loss of precision
test_checki64(float2int64_z(-2147483648.0f), INT32_MIN, "float2int64_z8");
test_checki64(float2int64_z(-21474836480.0f), -21474836480ll, "float2int64_z9");
test_checki64(float2int64_z(-2.5f), -2, "float2int64_z10");
test_checki64(float2int64_z(-2.4f), -2, "float2int64_z11");
printf("float2uint_z\n");
test_checku(float2uint_z(0.0f), 0, "float2uint_z1");
test_checku(float2uint_z(0.25f), 0, "float2uint_z2");
test_checku(float2uint_z(0.5f), 0, "float2uint_z3");
test_checku(float2uint_z(0.75f), 0, "float2uint_z4");
test_checku(float2uint_z(1.0f), 1, "float2uint_z5");
test_checku(float2uint_z(2147483647.0f), INT32_MAX+1u, "float2uint_z6"); // note loss of precision
test_checku(float2uint_z(2147483648.0f), INT32_MAX+1u, "float2uint_z7");
// todo test correct rounding around maximum precision
test_checku(float2uint_z(4294967294.5f), UINT32_MAX, "float2uint_z8"); // note loss of precision
test_checku(float2uint_z(4294967295.0f), UINT32_MAX, "float2uint_z9");
test_checku(float2uint_z(42949672950.0f), UINT32_MAX, "float2uint_z10");
printf("float2uint64_z\n");
test_checku64(float2uint64_z(0.0f), 0, "float2uint64_z1");
test_checku64(float2uint64_z(0.25f), 0, "float2uint64_z2");
test_checku64(float2uint64_z(0.5f), 0, "float2uint64_z3");
test_checku64(float2uint64_z(0.75f), 0, "float2uint64_z4");
test_checku64(float2uint64_z(1.0f), 1, "float2uint64_z5");
test_checku64(float2uint64_z(2147483647.0f), INT32_MAX+1u, "float2uint64_z6"); // note loss of precision
test_checku64(float2uint64_z(2147483648.0f), INT32_MAX+1u, "float2uint64_z7");
test_checku64(float2uint64_z(4294967294.5f), 4294967296ull, "float2uint64_z8"); // note loss of precision
test_checku64(float2uint64_z(4294967295.0f), 4294967296ull, "float2uint64_z9"); // note loss of precision
test_checku64(float2uint64_z(42949672950.0f), 42949672960ull, "float2uint64_z10"); // note loss of precision
// float exp10f(float x);
// void sincosf(float x, float *sinx, float *cosx);
// float powintf(float x, int y);
return rc;
}
int main() {
stdio_init_all();
int rc = test();
if (rc) {
printf("FAILED\n");
} else {
printf("PASSED\n");
}
}