/* * Copyright (c) 2024 Raspberry Pi Ltd. * * SPDX-License-Identifier: BSD-3-Clause */ #include #include #include #include // xoroshiro256++ pseudorandom number generator. // Adapted from: https://prng.di.unimi.it/xoshiro256plusplus.c // Original copyright notice: /* Written in 2019 by David Blackman and Sebastiano Vigna (vigna@acm.org) To the extent possible under law, the author has dedicated all copyright and related and neighboring rights to this software to the public domain worldwide. This software is distributed without any warranty. See . */ /* This is xoshiro256++ 1.0, one of our all-purpose, rock-solid generators. It has excellent (sub-ns) speed, a state (256 bits) that is large enough for any parallel application, and it passes all tests we are aware of. For generating just floating-point numbers, xoshiro256+ is even faster. The state must be seeded so that it is not everywhere zero. If you have a 64-bit seed, we suggest to seed a splitmix64 generator and use its output to fill s. */ static inline uint64_t xr256_rotl(const uint64_t x, int k) { return (x << k) | (x >> (64 - k)); } uint64_t xr256_next(uint64_t s[4]) { const uint64_t result = xr256_rotl(s[0] + s[3], 23) + s[0]; const uint64_t t = s[1] << 17; s[2] ^= s[0]; s[3] ^= s[1]; s[1] ^= s[2]; s[0] ^= s[3]; s[2] ^= t; s[3] = xr256_rotl(s[3], 45); return result; } uint32_t bitcast_f2u(float x) { // This is UB but then so is every C program union { float f; uint32_t u; } un; un.f = x; return un.u; } float bitcast_u2f(uint32_t x) { union { float f; uint32_t u; } un; un.u = x; return un.f; } bool is_nan_u(uint32_t x) { return ((x >> 23) & 0xffu) == 0xffu && (x & ~(-1u << 23)); } uint32_t flush_to_zero_u(uint32_t x) { if (!(x & (0xffu << 23))) { x &= -1u << 23; } return x; } uint32_t model_fadd(uint32_t x, uint32_t y) { x = flush_to_zero_u(x); y = flush_to_zero_u(y); // Use local hardware implementation to perform calculation uint32_t result = bitcast_f2u(bitcast_u2f(x) + bitcast_u2f(y)); // Use correct canonical generated nan if (is_nan_u(result)) { result = -1u; } result = flush_to_zero_u(result); return result; } uint32_t model_fmul(uint32_t x, uint32_t y) { x = flush_to_zero_u(x); y = flush_to_zero_u(y); // Use local hardware implementation to perform calculation uint32_t result = bitcast_f2u(bitcast_u2f(x) * bitcast_u2f(y)); // Use correct canonical generated nan if (is_nan_u(result)) { result = -1u; } result = flush_to_zero_u(result); return result; } int main() { // SHA-256 of a rude word uint64_t rand_state[4] = { 0x5891b5b522d5df08u, 0x6d0ff0b110fbd9d2u, 0x1bb4fc7163af34d0u, 0x8286a2e846f6be03u }; for (int i = 0; i < 1000; ++i) { uint32_t x, y; x = xr256_next(rand_state) & 0xffffffffu; y = xr256_next(rand_state) & 0xffffffffu; // Map nan to +-inf (input nans should already be well-covered) if (is_nan_u(x)) { x &= -1u << 23; } if (is_nan_u(y)) { y &= -1u << 23; } #if 1 printf("{0x%08xu, 0x%08xu, 0x%08xu},\n", x, y, model_fadd(x, y)); #else printf("{0x%08xu, 0x%08xu, 0x%08xu},\n", x, y, model_fmul(x, y)); #endif } }