mirror of
https://github.com/raspberrypi/pico-sdk.git
synced 2025-08-06 06:02:39 +03:00
* Add fast single-precision add/sub/mul for Hazard3 * Make test output less noisy. Map -nan to -inf in vector gen. Move random vectors to separate files. * Re-disable USB stdout for pico_float_test by default... * Disable pico/float.h exports on RISC-V as these functions aren't implemented * Add hazard3 instructions to asm_helper. Split hazard3.h to support this. You can still include hazard3.h to get everything. This just allows you to pull in less.
137 lines
3.2 KiB
C
137 lines
3.2 KiB
C
/*
|
|
* Copyright (c) 2024 Raspberry Pi Ltd.
|
|
*
|
|
* SPDX-License-Identifier: BSD-3-Clause
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <fenv.h>
|
|
#include <stdbool.h>
|
|
#include <stdint.h>
|
|
|
|
// xoroshiro256++ pseudorandom number generator.
|
|
// Adapted from: https://prng.di.unimi.it/xoshiro256plusplus.c
|
|
// Original copyright notice:
|
|
|
|
/* Written in 2019 by David Blackman and Sebastiano Vigna (vigna@acm.org)
|
|
|
|
To the extent possible under law, the author has dedicated all copyright
|
|
and related and neighboring rights to this software to the public domain
|
|
worldwide. This software is distributed without any warranty.
|
|
|
|
See <http://creativecommons.org/publicdomain/zero/1.0/>. */
|
|
|
|
/* This is xoshiro256++ 1.0, one of our all-purpose, rock-solid generators.
|
|
It has excellent (sub-ns) speed, a state (256 bits) that is large
|
|
enough for any parallel application, and it passes all tests we are
|
|
aware of.
|
|
|
|
For generating just floating-point numbers, xoshiro256+ is even faster.
|
|
|
|
The state must be seeded so that it is not everywhere zero. If you have
|
|
a 64-bit seed, we suggest to seed a splitmix64 generator and use its
|
|
output to fill s. */
|
|
|
|
static inline uint64_t xr256_rotl(const uint64_t x, int k) {
|
|
return (x << k) | (x >> (64 - k));
|
|
}
|
|
|
|
uint64_t xr256_next(uint64_t s[4]) {
|
|
const uint64_t result = xr256_rotl(s[0] + s[3], 23) + s[0];
|
|
|
|
const uint64_t t = s[1] << 17;
|
|
|
|
s[2] ^= s[0];
|
|
s[3] ^= s[1];
|
|
s[1] ^= s[2];
|
|
s[0] ^= s[3];
|
|
|
|
s[2] ^= t;
|
|
|
|
s[3] = xr256_rotl(s[3], 45);
|
|
|
|
return result;
|
|
}
|
|
uint32_t bitcast_f2u(float x) {
|
|
// This is UB but then so is every C program
|
|
union {
|
|
float f;
|
|
uint32_t u;
|
|
} un;
|
|
un.f = x;
|
|
return un.u;
|
|
}
|
|
|
|
float bitcast_u2f(uint32_t x) {
|
|
union {
|
|
float f;
|
|
uint32_t u;
|
|
} un;
|
|
un.u = x;
|
|
return un.f;
|
|
}
|
|
|
|
bool is_nan_u(uint32_t x) {
|
|
return ((x >> 23) & 0xffu) == 0xffu && (x & ~(-1u << 23));
|
|
}
|
|
|
|
uint32_t flush_to_zero_u(uint32_t x) {
|
|
if (!(x & (0xffu << 23))) {
|
|
x &= -1u << 23;
|
|
}
|
|
return x;
|
|
}
|
|
|
|
uint32_t model_fadd(uint32_t x, uint32_t y) {
|
|
x = flush_to_zero_u(x);
|
|
y = flush_to_zero_u(y);
|
|
// Use local hardware implementation to perform calculation
|
|
uint32_t result = bitcast_f2u(bitcast_u2f(x) + bitcast_u2f(y));
|
|
// Use correct canonical generated nan
|
|
if (is_nan_u(result)) {
|
|
result = -1u;
|
|
}
|
|
result = flush_to_zero_u(result);
|
|
return result;
|
|
}
|
|
|
|
uint32_t model_fmul(uint32_t x, uint32_t y) {
|
|
x = flush_to_zero_u(x);
|
|
y = flush_to_zero_u(y);
|
|
// Use local hardware implementation to perform calculation
|
|
uint32_t result = bitcast_f2u(bitcast_u2f(x) * bitcast_u2f(y));
|
|
// Use correct canonical generated nan
|
|
if (is_nan_u(result)) {
|
|
result = -1u;
|
|
}
|
|
result = flush_to_zero_u(result);
|
|
return result;
|
|
}
|
|
|
|
int main() {
|
|
// SHA-256 of a rude word
|
|
uint64_t rand_state[4] = {
|
|
0x5891b5b522d5df08u,
|
|
0x6d0ff0b110fbd9d2u,
|
|
0x1bb4fc7163af34d0u,
|
|
0x8286a2e846f6be03u
|
|
};
|
|
for (int i = 0; i < 1000; ++i) {
|
|
uint32_t x, y;
|
|
x = xr256_next(rand_state) & 0xffffffffu;
|
|
y = xr256_next(rand_state) & 0xffffffffu;
|
|
// Map nan to +-inf (input nans should already be well-covered)
|
|
if (is_nan_u(x)) {
|
|
x &= -1u << 23;
|
|
}
|
|
if (is_nan_u(y)) {
|
|
y &= -1u << 23;
|
|
}
|
|
#if 1
|
|
printf("{0x%08xu, 0x%08xu, 0x%08xu},\n", x, y, model_fadd(x, y));
|
|
#else
|
|
printf("{0x%08xu, 0x%08xu, 0x%08xu},\n", x, y, model_fmul(x, y));
|
|
#endif
|
|
}
|
|
}
|