HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
onnxruntime_float16.h
Go to the documentation of this file.
1 // Copyright (c) Microsoft Corporation. All rights reserved.
2 // Licensed under the MIT License.
3 
4 #pragma once
5 
6 #include <stdint.h>
7 #include <cmath>
8 #include <cstring>
9 #include <limits>
10 
11 namespace onnxruntime_float16 {
12 
13 namespace detail {
14 
15 enum class endian {
16 #if defined(_WIN32)
17  little = 0,
18  big = 1,
19  native = little,
20 #elif defined(__GNUC__) || defined(__clang__)
21  little = __ORDER_LITTLE_ENDIAN__,
22  big = __ORDER_BIG_ENDIAN__,
23  native = __BYTE_ORDER__,
24 #else
25 #error onnxruntime_float16::detail::endian is not implemented in this environment.
26 #endif
27 };
28 
29 static_assert(
31  "Only little-endian or big-endian native byte orders are supported.");
32 
33 } // namespace detail
34 
35 /// <summary>
36 /// Shared implementation between public and internal classes. CRTP pattern.
37 /// </summary>
38 template <class Derived>
39 struct Float16Impl {
40  protected:
41  /// <summary>
42  /// Converts from float to uint16_t float16 representation
43  /// </summary>
44  /// <param name="v"></param>
45  /// <returns></returns>
46  constexpr static uint16_t ToUint16Impl(float v) noexcept;
47 
48  /// <summary>
49  /// Converts float16 to float
50  /// </summary>
51  /// <returns>float representation of float16 value</returns>
52  float ToFloatImpl() const noexcept;
53 
54  /// <summary>
55  /// Creates an instance that represents absolute value.
56  /// </summary>
57  /// <returns>Absolute value</returns>
58  uint16_t AbsImpl() const noexcept {
59  return static_cast<uint16_t>(val & ~kSignMask);
60  }
61 
62  /// <summary>
63  /// Creates a new instance with the sign flipped.
64  /// </summary>
65  /// <returns>Flipped sign instance</returns>
66  uint16_t NegateImpl() const noexcept {
67  return IsNaN() ? val : static_cast<uint16_t>(val ^ kSignMask);
68  }
69 
70  public:
71  // uint16_t special values
72  static constexpr uint16_t kSignMask = 0x8000U;
73  static constexpr uint16_t kBiasedExponentMask = 0x7C00U;
74  static constexpr uint16_t kPositiveInfinityBits = 0x7C00U;
75  static constexpr uint16_t kNegativeInfinityBits = 0xFC00U;
76  static constexpr uint16_t kPositiveQNaNBits = 0x7E00U;
77  static constexpr uint16_t kNegativeQNaNBits = 0xFE00U;
78  static constexpr uint16_t kEpsilonBits = 0x4170U;
79  static constexpr uint16_t kMinValueBits = 0xFBFFU; // Minimum normal number
80  static constexpr uint16_t kMaxValueBits = 0x7BFFU; // Largest normal number
81  static constexpr uint16_t kOneBits = 0x3C00U;
82  static constexpr uint16_t kMinusOneBits = 0xBC00U;
83 
84  uint16_t val{0};
85 
86  Float16Impl() = default;
87 
88  /// <summary>
89  /// Checks if the value is negative
90  /// </summary>
91  /// <returns>true if negative</returns>
92  bool IsNegative() const noexcept {
93  return static_cast<int16_t>(val) < 0;
94  }
95 
96  /// <summary>
97  /// Tests if the value is NaN
98  /// </summary>
99  /// <returns>true if NaN</returns>
100  bool IsNaN() const noexcept {
101  return AbsImpl() > kPositiveInfinityBits;
102  }
103 
104  /// <summary>
105  /// Tests if the value is finite
106  /// </summary>
107  /// <returns>true if finite</returns>
108  bool IsFinite() const noexcept {
109  return AbsImpl() < kPositiveInfinityBits;
110  }
111 
112  /// <summary>
113  /// Tests if the value represents positive infinity.
114  /// </summary>
115  /// <returns>true if positive infinity</returns>
116  bool IsPositiveInfinity() const noexcept {
117  return val == kPositiveInfinityBits;
118  }
119 
120  /// <summary>
121  /// Tests if the value represents negative infinity
122  /// </summary>
123  /// <returns>true if negative infinity</returns>
124  bool IsNegativeInfinity() const noexcept {
125  return val == kNegativeInfinityBits;
126  }
127 
128  /// <summary>
129  /// Tests if the value is either positive or negative infinity.
130  /// </summary>
131  /// <returns>True if absolute value is infinity</returns>
132  bool IsInfinity() const noexcept {
133  return AbsImpl() == kPositiveInfinityBits;
134  }
135 
136  /// <summary>
137  /// Tests if the value is NaN or zero. Useful for comparisons.
138  /// </summary>
139  /// <returns>True if NaN or zero.</returns>
140  bool IsNaNOrZero() const noexcept {
141  auto abs = AbsImpl();
142  return (abs == 0 || abs > kPositiveInfinityBits);
143  }
144 
145  /// <summary>
146  /// Tests if the value is normal (not zero, subnormal, infinite, or NaN).
147  /// </summary>
148  /// <returns>True if so</returns>
149  bool IsNormal() const noexcept {
150  auto abs = AbsImpl();
151  return (abs < kPositiveInfinityBits) // is finite
152  && (abs != 0) // is not zero
153  && ((abs & kBiasedExponentMask) != 0); // is not subnormal (has a non-zero exponent)
154  }
155 
156  /// <summary>
157  /// Tests if the value is subnormal (denormal).
158  /// </summary>
159  /// <returns>True if so</returns>
160  bool IsSubnormal() const noexcept {
161  auto abs = AbsImpl();
162  return (abs < kPositiveInfinityBits) // is finite
163  && (abs != 0) // is not zero
164  && ((abs & kBiasedExponentMask) == 0); // is subnormal (has a zero exponent)
165  }
166 
167  /// <summary>
168  /// Creates an instance that represents absolute value.
169  /// </summary>
170  /// <returns>Absolute value</returns>
171  Derived Abs() const noexcept { return Derived::FromBits(AbsImpl()); }
172 
173  /// <summary>
174  /// Creates a new instance with the sign flipped.
175  /// </summary>
176  /// <returns>Flipped sign instance</returns>
177  Derived Negate() const noexcept { return Derived::FromBits(NegateImpl()); }
178 
179  /// <summary>
180  /// IEEE defines that positive and negative zero are equal, this gives us a quick equality check
181  /// for two values by or'ing the private bits together and stripping the sign. They are both zero,
182  /// and therefore equivalent, if the resulting value is still zero.
183  /// </summary>
184  /// <param name="lhs">first value</param>
185  /// <param name="rhs">second value</param>
186  /// <returns>True if both arguments represent zero</returns>
187  static bool AreZero(const Float16Impl& lhs, const Float16Impl& rhs) noexcept {
188  return static_cast<uint16_t>((lhs.val | rhs.val) & ~kSignMask) == 0;
189  }
190 
191  bool operator==(const Float16Impl& rhs) const noexcept {
192  if (IsNaN() || rhs.IsNaN()) {
193  // IEEE defines that NaN is not equal to anything, including itself.
194  return false;
195  }
196  return val == rhs.val;
197  }
198 
199  bool operator!=(const Float16Impl& rhs) const noexcept { return !(*this == rhs); }
200 
201  bool operator<(const Float16Impl& rhs) const noexcept {
202  if (IsNaN() || rhs.IsNaN()) {
203  // IEEE defines that NaN is unordered with respect to everything, including itself.
204  return false;
205  }
206 
207  const bool left_is_negative = IsNegative();
208  if (left_is_negative != rhs.IsNegative()) {
209  // When the signs of left and right differ, we know that left is less than right if it is
210  // the negative value. The exception to this is if both values are zero, in which case IEEE
211  // says they should be equal, even if the signs differ.
212  return left_is_negative && !AreZero(*this, rhs);
213  }
214  return (val != rhs.val) && ((val < rhs.val) ^ left_is_negative);
215  }
216 };
217 
218 // The following Float16_t conversions are based on the code from
219 // Eigen library.
220 
221 // The conversion routines are Copyright (c) Fabian Giesen, 2016.
222 // The original license follows:
223 //
224 // Copyright (c) Fabian Giesen, 2016
225 // All rights reserved.
226 // Redistribution and use in source and binary forms, with or without
227 // modification, are permitted.
228 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
229 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
230 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
231 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
232 // HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
233 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
234 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
235 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
236 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
237 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
238 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
239 
240 namespace detail {
242  unsigned int u;
243  float f;
244 };
245 } // namespace detail
246 
247 template <class Derived>
248 inline constexpr uint16_t Float16Impl<Derived>::ToUint16Impl(float v) noexcept {
250  f.f = v;
251 
252  constexpr detail::float32_bits f32infty = {255 << 23};
253  constexpr detail::float32_bits f16max = {(127 + 16) << 23};
254  constexpr detail::float32_bits denorm_magic = {((127 - 15) + (23 - 10) + 1) << 23};
255  constexpr unsigned int sign_mask = 0x80000000u;
256  uint16_t val = static_cast<uint16_t>(0x0u);
257 
258  unsigned int sign = f.u & sign_mask;
259  f.u ^= sign;
260 
261  // NOTE all the integer compares in this function can be safely
262  // compiled into signed compares since all operands are below
263  // 0x80000000. Important if you want fast straight SSE2 code
264  // (since there's no unsigned PCMPGTD).
265 
266  if (f.u >= f16max.u) { // result is Inf or NaN (all exponent bits set)
267  val = (f.u > f32infty.u) ? 0x7e00 : 0x7c00; // NaN->qNaN and Inf->Inf
268  } else { // (De)normalized number or zero
269  if (f.u < (113 << 23)) { // resulting FP16 is subnormal or zero
270  // use a magic value to align our 10 mantissa bits at the bottom of
271  // the float. as long as FP addition is round-to-nearest-even this
272  // just works.
273  f.f += denorm_magic.f;
274 
275  // and one integer subtract of the bias later, we have our final float!
276  val = static_cast<uint16_t>(f.u - denorm_magic.u);
277  } else {
278  unsigned int mant_odd = (f.u >> 13) & 1; // resulting mantissa is odd
279 
280  // update exponent, rounding bias part 1
281  // Equivalent to `f.u += ((unsigned int)(15 - 127) << 23) + 0xfff`, but
282  // without arithmetic overflow.
283  f.u += 0xc8000fffU;
284  // rounding bias part 2
285  f.u += mant_odd;
286  // take the bits!
287  val = static_cast<uint16_t>(f.u >> 13);
288  }
289  }
290 
291  val |= static_cast<uint16_t>(sign >> 16);
292  return val;
293 }
294 
295 template <class Derived>
296 inline float Float16Impl<Derived>::ToFloatImpl() const noexcept {
297  constexpr detail::float32_bits magic = {113 << 23};
298  constexpr unsigned int shifted_exp = 0x7c00 << 13; // exponent mask after shift
300 
301  o.u = (val & 0x7fff) << 13; // exponent/mantissa bits
302  unsigned int exp = shifted_exp & o.u; // just the exponent
303  o.u += (127 - 15) << 23; // exponent adjust
304 
305  // handle exponent special cases
306  if (exp == shifted_exp) { // Inf/NaN?
307  o.u += (128 - 16) << 23; // extra exp adjust
308  } else if (exp == 0) { // Zero/Denormal?
309  o.u += 1 << 23; // extra exp adjust
310  o.f -= magic.f; // re-normalize
311  }
312 
313  // Attempt to workaround the Internal Compiler Error on ARM64
314  // for bitwise | operator, including std::bitset
315 #if (defined _MSC_VER) && (defined _M_ARM || defined _M_ARM64 || defined _M_ARM64EC)
316  if (IsNegative()) {
317  return -o.f;
318  }
319 #else
320  // original code:
321  o.u |= (val & 0x8000U) << 16U; // sign bit
322 #endif
323  return o.f;
324 }
325 
326 /// Shared implementation between public and internal classes. CRTP pattern.
327 template <class Derived>
328 struct BFloat16Impl {
329  protected:
330  /// <summary>
331  /// Converts from float to uint16_t float16 representation
332  /// </summary>
333  /// <param name="v"></param>
334  /// <returns></returns>
335  static uint16_t ToUint16Impl(float v) noexcept;
336 
337  /// <summary>
338  /// Converts bfloat16 to float
339  /// </summary>
340  /// <returns>float representation of bfloat16 value</returns>
341  float ToFloatImpl() const noexcept;
342 
343  /// <summary>
344  /// Creates an instance that represents absolute value.
345  /// </summary>
346  /// <returns>Absolute value</returns>
347  uint16_t AbsImpl() const noexcept {
348  return static_cast<uint16_t>(val & ~kSignMask);
349  }
350 
351  /// <summary>
352  /// Creates a new instance with the sign flipped.
353  /// </summary>
354  /// <returns>Flipped sign instance</returns>
355  uint16_t NegateImpl() const noexcept {
356  return IsNaN() ? val : static_cast<uint16_t>(val ^ kSignMask);
357  }
358 
359  public:
360  // uint16_t special values
361  static constexpr uint16_t kSignMask = 0x8000U;
362  static constexpr uint16_t kBiasedExponentMask = 0x7F80U;
363  static constexpr uint16_t kPositiveInfinityBits = 0x7F80U;
364  static constexpr uint16_t kNegativeInfinityBits = 0xFF80U;
365  static constexpr uint16_t kPositiveQNaNBits = 0x7FC1U;
366  static constexpr uint16_t kNegativeQNaNBits = 0xFFC1U;
367  static constexpr uint16_t kSignaling_NaNBits = 0x7F80U;
368  static constexpr uint16_t kEpsilonBits = 0x0080U;
369  static constexpr uint16_t kMinValueBits = 0xFF7FU;
370  static constexpr uint16_t kMaxValueBits = 0x7F7FU;
371  static constexpr uint16_t kRoundToNearest = 0x7FFFU;
372  static constexpr uint16_t kOneBits = 0x3F80U;
373  static constexpr uint16_t kMinusOneBits = 0xBF80U;
374 
375  uint16_t val{0};
376 
377  BFloat16Impl() = default;
378 
379  /// <summary>
380  /// Checks if the value is negative
381  /// </summary>
382  /// <returns>true if negative</returns>
383  bool IsNegative() const noexcept {
384  return static_cast<int16_t>(val) < 0;
385  }
386 
387  /// <summary>
388  /// Tests if the value is NaN
389  /// </summary>
390  /// <returns>true if NaN</returns>
391  bool IsNaN() const noexcept {
392  return AbsImpl() > kPositiveInfinityBits;
393  }
394 
395  /// <summary>
396  /// Tests if the value is finite
397  /// </summary>
398  /// <returns>true if finite</returns>
399  bool IsFinite() const noexcept {
400  return AbsImpl() < kPositiveInfinityBits;
401  }
402 
403  /// <summary>
404  /// Tests if the value represents positive infinity.
405  /// </summary>
406  /// <returns>true if positive infinity</returns>
407  bool IsPositiveInfinity() const noexcept {
408  return val == kPositiveInfinityBits;
409  }
410 
411  /// <summary>
412  /// Tests if the value represents negative infinity
413  /// </summary>
414  /// <returns>true if negative infinity</returns>
415  bool IsNegativeInfinity() const noexcept {
416  return val == kNegativeInfinityBits;
417  }
418 
419  /// <summary>
420  /// Tests if the value is either positive or negative infinity.
421  /// </summary>
422  /// <returns>True if absolute value is infinity</returns>
423  bool IsInfinity() const noexcept {
424  return AbsImpl() == kPositiveInfinityBits;
425  }
426 
427  /// <summary>
428  /// Tests if the value is NaN or zero. Useful for comparisons.
429  /// </summary>
430  /// <returns>True if NaN or zero.</returns>
431  bool IsNaNOrZero() const noexcept {
432  auto abs = AbsImpl();
433  return (abs == 0 || abs > kPositiveInfinityBits);
434  }
435 
436  /// <summary>
437  /// Tests if the value is normal (not zero, subnormal, infinite, or NaN).
438  /// </summary>
439  /// <returns>True if so</returns>
440  bool IsNormal() const noexcept {
441  auto abs = AbsImpl();
442  return (abs < kPositiveInfinityBits) // is finite
443  && (abs != 0) // is not zero
444  && ((abs & kBiasedExponentMask) != 0); // is not subnormal (has a non-zero exponent)
445  }
446 
447  /// <summary>
448  /// Tests if the value is subnormal (denormal).
449  /// </summary>
450  /// <returns>True if so</returns>
451  bool IsSubnormal() const noexcept {
452  auto abs = AbsImpl();
453  return (abs < kPositiveInfinityBits) // is finite
454  && (abs != 0) // is not zero
455  && ((abs & kBiasedExponentMask) == 0); // is subnormal (has a zero exponent)
456  }
457 
458  /// <summary>
459  /// Creates an instance that represents absolute value.
460  /// </summary>
461  /// <returns>Absolute value</returns>
462  Derived Abs() const noexcept { return Derived::FromBits(AbsImpl()); }
463 
464  /// <summary>
465  /// Creates a new instance with the sign flipped.
466  /// </summary>
467  /// <returns>Flipped sign instance</returns>
468  Derived Negate() const noexcept { return Derived::FromBits(NegateImpl()); }
469 
470  /// <summary>
471  /// IEEE defines that positive and negative zero are equal, this gives us a quick equality check
472  /// for two values by or'ing the private bits together and stripping the sign. They are both zero,
473  /// and therefore equivalent, if the resulting value is still zero.
474  /// </summary>
475  /// <param name="lhs">first value</param>
476  /// <param name="rhs">second value</param>
477  /// <returns>True if both arguments represent zero</returns>
478  static bool AreZero(const BFloat16Impl& lhs, const BFloat16Impl& rhs) noexcept {
479  // IEEE defines that positive and negative zero are equal, this gives us a quick equality check
480  // for two values by or'ing the private bits together and stripping the sign. They are both zero,
481  // and therefore equivalent, if the resulting value is still zero.
482  return static_cast<uint16_t>((lhs.val | rhs.val) & ~kSignMask) == 0;
483  }
484 };
485 
486 template <class Derived>
487 inline uint16_t BFloat16Impl<Derived>::ToUint16Impl(float v) noexcept {
488  uint16_t result;
489  if (std::isnan(v)) {
490  result = kPositiveQNaNBits;
491  } else {
492  auto get_msb_half = [](float fl) {
493  uint16_t result;
494 #ifdef __cpp_if_constexpr
495  if constexpr (detail::endian::native == detail::endian::little) {
496 #else
497  if (detail::endian::native == detail::endian::little) {
498 #endif
499  std::memcpy(&result, reinterpret_cast<char*>(&fl) + sizeof(uint16_t), sizeof(uint16_t));
500  } else {
501  std::memcpy(&result, &fl, sizeof(uint16_t));
502  }
503  return result;
504  };
505 
506  uint16_t upper_bits = get_msb_half(v);
507  union {
508  uint32_t U32;
509  float F32;
510  };
511  F32 = v;
512  U32 += (upper_bits & 1) + kRoundToNearest;
513  result = get_msb_half(F32);
514  }
515  return result;
516 }
517 
518 template <class Derived>
519 inline float BFloat16Impl<Derived>::ToFloatImpl() const noexcept {
520  if (IsNaN()) {
521  return std::numeric_limits<float>::quiet_NaN();
522  }
523  float result;
524  char* const first = reinterpret_cast<char*>(&result);
525  char* const second = first + sizeof(uint16_t);
526 #ifdef __cpp_if_constexpr
527  if constexpr (detail::endian::native == detail::endian::little) {
528 #else
529  if (detail::endian::native == detail::endian::little) {
530 #endif
531  std::memset(first, 0, sizeof(uint16_t));
532  std::memcpy(second, &val, sizeof(uint16_t));
533  } else {
534  std::memcpy(first, &val, sizeof(uint16_t));
535  std::memset(second, 0, sizeof(uint16_t));
536  }
537  return result;
538 }
539 
540 } // namespace onnxruntime_float16
GLint first
Definition: glcorearb.h:405
static constexpr uint16_t kOneBits
bool operator<(const Float16Impl &rhs) const noexcept
bool IsNaN() const noexcept
Tests if the value is NaN
Derived Abs() const noexcept
Creates an instance that represents absolute value.
Derived Negate() const noexcept
Creates a new instance with the sign flipped.
static constexpr uint16_t kOneBits
const GLdouble * v
Definition: glcorearb.h:837
bool IsSubnormal() const noexcept
Tests if the value is subnormal (denormal).
static constexpr uint16_t kPositiveQNaNBits
static constexpr uint16_t kSignaling_NaNBits
bool operator!=(const Float16Impl &rhs) const noexcept
static constexpr uint16_t kNegativeInfinityBits
bool operator==(const Float16Impl &rhs) const noexcept
float ToFloatImpl() const noexcept
Converts bfloat16 to float
static bool AreZero(const Float16Impl &lhs, const Float16Impl &rhs) noexcept
IEEE defines that positive and negative zero are equal, this gives us a quick equality check for two ...
**But if you need a result
Definition: thread.h:613
bool IsNegative() const noexcept
Checks if the value is negative
static constexpr uint16_t kMaxValueBits
uint16_t NegateImpl() const noexcept
Creates a new instance with the sign flipped.
static constexpr uint16_t ToUint16Impl(float v) noexcept
Converts from float to uint16_t float16 representation
bool IsFinite() const noexcept
Tests if the value is finite
static constexpr uint16_t kBiasedExponentMask
static constexpr uint16_t kMinValueBits
static constexpr uint16_t kNegativeQNaNBits
bool IsPositiveInfinity() const noexcept
Tests if the value represents positive infinity.
GLfloat f
Definition: glcorearb.h:1926
bool IsNormal() const noexcept
Tests if the value is normal (not zero, subnormal, infinite, or NaN).
bool IsFinite() const noexcept
Tests if the value is finite
Shared implementation between public and internal classes. CRTP pattern.
bool IsNegative() const noexcept
Checks if the value is negative
static constexpr uint16_t kBiasedExponentMask
static constexpr uint16_t kNegativeInfinityBits
static constexpr uint16_t kMinusOneBits
bool IsNegativeInfinity() const noexcept
Tests if the value represents negative infinity
float ToFloatImpl() const noexcept
Converts float16 to float
static constexpr uint16_t kEpsilonBits
bool IsPositiveInfinity() const noexcept
Tests if the value represents positive infinity.
static constexpr uint16_t kPositiveQNaNBits
static constexpr uint16_t kMinusOneBits
IMATH_HOSTDEVICE constexpr int sign(T a) IMATH_NOEXCEPT
Definition: ImathFun.h:33
static constexpr uint16_t kSignMask
static constexpr uint16_t kMinValueBits
Derived Negate() const noexcept
Creates a new instance with the sign flipped.
uint16_t AbsImpl() const noexcept
Creates an instance that represents absolute value.
bool IsNaNOrZero() const noexcept
Tests if the value is NaN or zero. Useful for comparisons.
static constexpr uint16_t kNegativeQNaNBits
bool IsInfinity() const noexcept
Tests if the value is either positive or negative infinity.
Derived Abs() const noexcept
Creates an instance that represents absolute value.
bool IsSubnormal() const noexcept
Tests if the value is subnormal (denormal).
static constexpr uint16_t kPositiveInfinityBits
static constexpr uint16_t kMaxValueBits
static constexpr uint16_t kPositiveInfinityBits
bool IsNaN() const noexcept
Tests if the value is NaN
GLuint GLfloat * val
Definition: glcorearb.h:1608
static constexpr uint16_t kRoundToNearest
static constexpr uint16_t kSignMask
bool IsNormal() const noexcept
Tests if the value is normal (not zero, subnormal, infinite, or NaN).
uint16_t AbsImpl() const noexcept
Creates an instance that represents absolute value.
bool IsNaNOrZero() const noexcept
Tests if the value is NaN or zero. Useful for comparisons.
static bool AreZero(const BFloat16Impl &lhs, const BFloat16Impl &rhs) noexcept
IEEE defines that positive and negative zero are equal, this gives us a quick equality check for two ...
IMATH_INTERNAL_NAMESPACE_HEADER_ENTER IMATH_HOSTDEVICE constexpr T abs(T a) IMATH_NOEXCEPT
Definition: ImathFun.h:26
uint16_t NegateImpl() const noexcept
Creates a new instance with the sign flipped.
static constexpr uint16_t kEpsilonBits
bool IsInfinity() const noexcept
Tests if the value is either positive or negative infinity.
Shared implementation between public and internal classes. CRTP pattern.
static uint16_t ToUint16Impl(float v) noexcept
Converts from float to uint16_t float16 representation
bool IsNegativeInfinity() const noexcept
Tests if the value represents negative infinity