11 namespace onnxruntime_float16 {
20 #elif defined(__GNUC__) || defined(__clang__)
21 little = __ORDER_LITTLE_ENDIAN__,
22 big = __ORDER_BIG_ENDIAN__,
25 #error onnxruntime_float16::detail::endian is not implemented in this environment.
31 "Only little-endian or big-endian native byte orders are supported.");
38 template <
class Derived>
93 return static_cast<int16_t
>(
val) < 0;
171 Derived
Abs() const noexcept {
return Derived::FromBits(
AbsImpl()); }
188 return static_cast<uint16_t
>((lhs.val | rhs.val) & ~
kSignMask) == 0;
192 if (
IsNaN() || rhs.IsNaN()) {
196 return val == rhs.val;
202 if (
IsNaN() || rhs.IsNaN()) {
208 if (left_is_negative != rhs.IsNegative()) {
212 return left_is_negative && !
AreZero(*
this, rhs);
214 return (
val != rhs.val) && ((
val < rhs.val) ^ left_is_negative);
247 template <
class Derived>
255 constexpr
unsigned int sign_mask = 0x80000000u;
256 uint16_t
val =
static_cast<uint16_t
>(0x0u);
258 unsigned int sign =
f.u & sign_mask;
266 if (
f.u >= f16max.u) {
267 val = (
f.u > f32infty.u) ? 0x7e00 : 0x7c00;
269 if (
f.u < (113 << 23)) {
273 f.f += denorm_magic.f;
276 val =
static_cast<uint16_t
>(
f.u - denorm_magic.u);
278 unsigned int mant_odd = (
f.u >> 13) & 1;
287 val =
static_cast<uint16_t
>(
f.u >> 13);
291 val |=
static_cast<uint16_t
>(sign >> 16);
295 template <
class Derived>
298 constexpr
unsigned int shifted_exp = 0x7c00 << 13;
301 o.
u = (
val & 0x7fff) << 13;
302 unsigned int exp = shifted_exp & o.u;
303 o.u += (127 - 15) << 23;
306 if (exp == shifted_exp) {
307 o.u += (128 - 16) << 23;
308 }
else if (exp == 0) {
315 #if (defined _MSC_VER) && (defined _M_ARM || defined _M_ARM64 || defined _M_ARM64EC)
321 o.u |= (
val & 0x8000U) << 16U;
327 template <
class Derived>
384 return static_cast<int16_t
>(
val) < 0;
462 Derived
Abs() const noexcept {
return Derived::FromBits(
AbsImpl()); }
482 return static_cast<uint16_t
>((lhs.val | rhs.val) & ~
kSignMask) == 0;
486 template <
class Derived>
490 result = kPositiveQNaNBits;
492 auto get_msb_half = [](
float fl) {
494 #ifdef __cpp_if_constexpr
495 if constexpr (detail::endian::native == detail::endian::little) {
497 if (detail::endian::native == detail::endian::little) {
499 std::memcpy(&result, reinterpret_cast<char*>(&fl) +
sizeof(uint16_t),
sizeof(uint16_t));
501 std::memcpy(&result, &fl,
sizeof(uint16_t));
506 uint16_t upper_bits = get_msb_half(
v);
512 U32 += (upper_bits & 1) + kRoundToNearest;
513 result = get_msb_half(F32);
518 template <
class Derived>
521 return std::numeric_limits<float>::quiet_NaN();
524 char*
const first =
reinterpret_cast<char*
>(&
result);
525 char*
const second = first +
sizeof(uint16_t);
526 #ifdef __cpp_if_constexpr
527 if constexpr (detail::endian::native == detail::endian::little) {
529 if (detail::endian::native == detail::endian::little) {
531 std::memset(first, 0,
sizeof(uint16_t));
532 std::memcpy(second, &
val,
sizeof(uint16_t));
534 std::memcpy(first, &
val,
sizeof(uint16_t));
535 std::memset(second, 0,
sizeof(uint16_t));
static constexpr uint16_t kOneBits
bool operator<(const Float16Impl &rhs) const noexcept
bool IsNaN() const noexcept
Tests if the value is NaN
Derived Abs() const noexcept
Creates an instance that represents absolute value.
Derived Negate() const noexcept
Creates a new instance with the sign flipped.
static constexpr uint16_t kOneBits
bool IsSubnormal() const noexcept
Tests if the value is subnormal (denormal).
static constexpr uint16_t kPositiveQNaNBits
static constexpr uint16_t kSignaling_NaNBits
bool operator!=(const Float16Impl &rhs) const noexcept
static constexpr uint16_t kNegativeInfinityBits
bool operator==(const Float16Impl &rhs) const noexcept
float ToFloatImpl() const noexcept
Converts bfloat16 to float
static bool AreZero(const Float16Impl &lhs, const Float16Impl &rhs) noexcept
IEEE defines that positive and negative zero are equal, this gives us a quick equality check for two ...
**But if you need a result
bool IsNegative() const noexcept
Checks if the value is negative
static constexpr uint16_t kMaxValueBits
uint16_t NegateImpl() const noexcept
Creates a new instance with the sign flipped.
static constexpr uint16_t ToUint16Impl(float v) noexcept
Converts from float to uint16_t float16 representation
bool IsFinite() const noexcept
Tests if the value is finite
static constexpr uint16_t kBiasedExponentMask
static constexpr uint16_t kMinValueBits
static constexpr uint16_t kNegativeQNaNBits
bool IsPositiveInfinity() const noexcept
Tests if the value represents positive infinity.
bool IsNormal() const noexcept
Tests if the value is normal (not zero, subnormal, infinite, or NaN).
bool IsFinite() const noexcept
Tests if the value is finite
Shared implementation between public and internal classes. CRTP pattern.
bool IsNegative() const noexcept
Checks if the value is negative
static constexpr uint16_t kBiasedExponentMask
static constexpr uint16_t kNegativeInfinityBits
static constexpr uint16_t kMinusOneBits
bool IsNegativeInfinity() const noexcept
Tests if the value represents negative infinity
float ToFloatImpl() const noexcept
Converts float16 to float
static constexpr uint16_t kEpsilonBits
bool IsPositiveInfinity() const noexcept
Tests if the value represents positive infinity.
static constexpr uint16_t kPositiveQNaNBits
static constexpr uint16_t kMinusOneBits
IMATH_HOSTDEVICE constexpr int sign(T a) IMATH_NOEXCEPT
static constexpr uint16_t kSignMask
static constexpr uint16_t kMinValueBits
Derived Negate() const noexcept
Creates a new instance with the sign flipped.
uint16_t AbsImpl() const noexcept
Creates an instance that represents absolute value.
bool IsNaNOrZero() const noexcept
Tests if the value is NaN or zero. Useful for comparisons.
static constexpr uint16_t kNegativeQNaNBits
bool IsInfinity() const noexcept
Tests if the value is either positive or negative infinity.
Derived Abs() const noexcept
Creates an instance that represents absolute value.
bool IsSubnormal() const noexcept
Tests if the value is subnormal (denormal).
static constexpr uint16_t kPositiveInfinityBits
static constexpr uint16_t kMaxValueBits
static constexpr uint16_t kPositiveInfinityBits
bool IsNaN() const noexcept
Tests if the value is NaN
static constexpr uint16_t kRoundToNearest
static constexpr uint16_t kSignMask
bool IsNormal() const noexcept
Tests if the value is normal (not zero, subnormal, infinite, or NaN).
uint16_t AbsImpl() const noexcept
Creates an instance that represents absolute value.
bool IsNaNOrZero() const noexcept
Tests if the value is NaN or zero. Useful for comparisons.
static bool AreZero(const BFloat16Impl &lhs, const BFloat16Impl &rhs) noexcept
IEEE defines that positive and negative zero are equal, this gives us a quick equality check for two ...
IMATH_INTERNAL_NAMESPACE_HEADER_ENTER IMATH_HOSTDEVICE constexpr T abs(T a) IMATH_NOEXCEPT
uint16_t NegateImpl() const noexcept
Creates a new instance with the sign flipped.
static constexpr uint16_t kEpsilonBits
bool IsInfinity() const noexcept
Tests if the value is either positive or negative infinity.
Shared implementation between public and internal classes. CRTP pattern.
static uint16_t ToUint16Impl(float v) noexcept
Converts from float to uint16_t float16 representation
bool IsNegativeInfinity() const noexcept
Tests if the value represents negative infinity