HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
float16.h
Go to the documentation of this file.
1 // Copyright (c) Microsoft Corporation. All rights reserved.
2 // Licensed under the MIT License.
3 #pragma once
4 
5 #include <math.h>
6 
7 #include "endian.h"
8 #if defined(CUDA_VERSION) && CUDA_VERSION >= 11000
9 #include "cuda_bf16.h"
10 #endif
11 
12 #if !defined(__CUDACC__) && !defined(__HIPCC__)
13 #include "core/common/narrow.h"
14 #endif
15 
16 #include "core/common/common.h"
17 
19 
20 namespace onnxruntime {
21 
22 #if defined(__CUDACC__) || defined(__HIPCC__)
23 #define ORT_HOST_DEVICE __host__ __device__
24 #else
25 #define ORT_HOST_DEVICE
26 #endif
27 
28 // MLFloat16
30  private:
31  explicit constexpr MLFloat16(uint16_t x) noexcept { val = x; }
32 
33  public:
35 
36  MLFloat16() = default;
37 
38  constexpr static MLFloat16 FromBits(uint16_t x) noexcept { return MLFloat16(x); }
39 
40  // Using inherited implementation instead of math floatToHalf allows us to use this
41  // in other shared providers without having to implement the bridge
42  explicit MLFloat16(float v) noexcept { val = Base::ToUint16Impl(v); }
43 
44  static const MLFloat16 NaN;
45  static const MLFloat16 NegativeNaN;
46  static const MLFloat16 Infinity;
48  static const MLFloat16 Epsilon;
49  static const MLFloat16 MinValue;
50  static const MLFloat16 MaxValue;
51  static const MLFloat16 Zero;
52  static const MLFloat16 One;
53  static const MLFloat16 MinusOne;
54 
55  // Using inherited implementation instead of math halfToFloat allows us to use this
56  // in other shared providers without having to implement the bridge
57  float ToFloat() const noexcept { return Base::ToFloatImpl(); }
58 
59  using Base::IsNegative;
60 
61  using Base::IsNaN;
62 
63  using Base::IsFinite;
64 
66 
68 
69  using Base::IsInfinity;
70 
71  using Base::IsNaNOrZero;
72 
73  using Base::IsNormal;
74 
75  using Base::IsSubnormal;
76 
77  using Base::Abs;
78 
79  using Base::Negate;
80 
81  operator float() const noexcept { return ToFloat(); }
82 
83  using Base::operator==;
84  using Base::operator!=;
85  using Base::operator<;
86 };
87 
88 // BFloat16
91 
92 #if defined(__HIP__)
93  ORT_HOST_DEVICE BFloat16() = default;
94 #else
95  BFloat16() = default;
96 #endif
97 
98  struct FromBitsT {};
99  static constexpr ORT_HOST_DEVICE FromBitsT FromBits() noexcept { return FromBitsT(); }
100  constexpr ORT_HOST_DEVICE BFloat16(unsigned short bits, FromBitsT) noexcept { val = bits; }
101 
102  static constexpr ORT_HOST_DEVICE BFloat16 FromBits(uint16_t bits) noexcept {
103  return BFloat16(bits, FromBits());
104  }
105 
106  inline ORT_HOST_DEVICE BFloat16(float v) noexcept {
107 #if defined(CUDA_VERSION) && CUDA_VERSION >= 11000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800
108  val = __bfloat16_as_ushort(__float2bfloat16(v));
109 #elif defined(__HIP__)
110  // We should be using memcpy in order to respect the strict aliasing rule but it fails in the HIP environment.
111  if (v != v) { // isnan
112  val = UINT16_C(0x7FC0);
113  } else {
114  union {
115  uint32_t U32;
116  float F32;
117  };
118 
119  F32 = v;
120  uint32_t rounding_bias = ((U32 >> 16) & 1) + UINT32_C(0x7FFF);
121  val = static_cast<uint16_t>((U32 + rounding_bias) >> 16);
122  }
123 #else
124 
125  // Use C isnan to work both in host and device
126  if (::isnan(v)) {
128  } else {
129  auto get_msb_half = [](float fl) {
130  uint16_t result;
131  if constexpr (onnxruntime_float16::detail::endian::native == onnxruntime_float16::detail::endian::little) {
132  std::memcpy(&result, reinterpret_cast<char*>(&fl) + sizeof(uint16_t), sizeof(uint16_t));
133  } else {
134  std::memcpy(&result, &fl, sizeof(uint16_t));
135  }
136  return result;
137  };
138 
139  uint16_t upper_bits = get_msb_half(v);
140  union {
141  uint32_t U32;
142  float F32;
143  };
144  F32 = v;
145  U32 += (upper_bits & 1) + kRoundToNearest;
146  val = get_msb_half(F32);
147  }
148 #endif
149  }
150 
151  inline ORT_HOST_DEVICE float ToFloat() const noexcept {
152 #if defined(CUDA_VERSION) && CUDA_VERSION >= 11000
153  return __bfloat162float(*reinterpret_cast<const __nv_bfloat16*>(&val));
154 #elif defined(__HIP__)
155  // We should be using memcpy in order to respect the strict aliasing rule but it fails in the HIP environment.
156  float result = 0;
157  uint32_t tmp = val;
158  tmp <<= 16;
159  float* tempRes = reinterpret_cast<float*>(&tmp);
160  result = *tempRes;
161  return result;
162 #else
163 
164  if (IsNaNHostDevice()) {
165  return std::numeric_limits<float>::quiet_NaN();
166  }
167 
168  float result = 0;
169  char* const first = reinterpret_cast<char*>(&result);
170  if constexpr (endian::native == endian::little) {
171  char* const second = first + sizeof(uint16_t);
172  std::memcpy(second, &val, sizeof(uint16_t));
173  } else {
174  std::memcpy(first, &val, sizeof(uint16_t));
175  }
176  return result;
177 #endif
178  }
179 
180  static const BFloat16 NaN;
181  static const BFloat16 NegativeNaN;
182  static const BFloat16 Infinity;
184  static const BFloat16 Epsilon;
185  static const BFloat16 MinValue;
186  static const BFloat16 MaxValue;
187  static const BFloat16 Zero;
188  static const BFloat16 One;
189  static const BFloat16 MinusOne;
190 
191  using Base::IsNegative;
192 
193  using Base::IsNaN;
194 
195  using Base::IsFinite;
196 
198 
200 
201  using Base::IsInfinity;
202 
203  using Base::IsNaNOrZero;
204 
205  using Base::IsNormal;
206 
207  using Base::IsSubnormal;
208 
209  using Base::Abs;
210 
211  using Base::Negate;
212 
213  ORT_HOST_DEVICE operator float() const noexcept { return ToFloat(); }
214 
215 #if defined(CUDA_VERSION) && CUDA_VERSION >= 11000
216  ORT_HOST_DEVICE BFloat16(const __nv_bfloat16& value) { val = *reinterpret_cast<const unsigned short*>(&value); }
217  explicit ORT_HOST_DEVICE operator __nv_bfloat16() const { return *reinterpret_cast<const __nv_bfloat16*>(&val); }
218 #endif
219 
220  ORT_HOST_DEVICE bool operator==(const BFloat16& rhs) const noexcept {
221  if (IsNaNHostDevice() || rhs.IsNaNHostDevice()) {
222  // IEEE defines that NaN is not equal to anything, including itself.
223  return false;
224  }
225  return val == rhs.val;
226  }
227 
228  ORT_HOST_DEVICE bool operator!=(const BFloat16& rhs) const noexcept {
229  return !(*this == rhs);
230  }
231 
232  ORT_HOST_DEVICE bool operator<(const BFloat16& rhs) const noexcept {
233  if (IsNaNHostDevice() || rhs.IsNaNHostDevice()) {
234  // IEEE defines that NaN is unordered with respect to everything, including itself.
235  return false;
236  }
237 
238  const bool left_is_negative = IsNegativeHostDevice();
239  if (left_is_negative != rhs.IsNegativeHostDevice()) {
240  // When the signs of left and right differ, we know that left is less than right if it is
241  // the negative value. The exception to this is if both values are zero, in which case IEEE
242  // says they should be equal, even if the signs differ.
243  return left_is_negative && !AreZeroHostDevice(*this, rhs);
244  }
245  return (val != rhs.val) && ((val < rhs.val) ^ left_is_negative);
246  }
247 
248  ORT_HOST_DEVICE bool IsNegativeHostDevice() const noexcept {
249  return (val & kSignMask) != 0;
250  }
251 
252  ORT_HOST_DEVICE bool IsNaNHostDevice() const noexcept {
253  return static_cast<uint16_t>(val & ~kSignMask) > kPositiveInfinityBits;
254  }
255 
256  ORT_HOST_DEVICE static bool AreZeroHostDevice(const BFloat16Impl& lhs, const BFloat16Impl& rhs) noexcept {
257  // IEEE defines that positive and negative zero are equal, this gives us a quick equality check
258  // for two values by or'ing the private bits together and stripping the sign. They are both zero,
259  // and therefore equivalent, if the resulting value is still zero.
260  return static_cast<uint16_t>((lhs.val | rhs.val) & ~kSignMask) == 0;
261  }
262 };
263 
264 // User defined suffixes to make it easier to declare
265 // initializers with MLFloat16 and BFloat16 from unsigned short
266 // E.g 10_f16 or 10_b16
267 #if !defined(__CUDACC__) && !defined(__HIPCC__)
268 inline MLFloat16 operator"" _f16(unsigned long long int v) noexcept {
269  return MLFloat16::FromBits(narrow<uint16_t>(v));
270 }
271 
272 inline MLFloat16 operator"" _fp16(long double v) noexcept {
273  return MLFloat16(static_cast<float>(v));
274 }
275 
276 inline BFloat16 operator"" _b16(unsigned long long int v) noexcept {
277  return BFloat16::FromBits((narrow<uint16_t>(v)));
278 }
279 
280 inline BFloat16 operator"" _bfp16(long double v) noexcept {
281  return BFloat16(static_cast<float>(v));
282 }
283 #endif
284 
285 inline void BFloat16ToFloat(const BFloat16* blf, float* flt, size_t size) noexcept {
286  auto src = blf;
287  auto d = flt;
288  for (; size != 0; ++src, ++d, --size) {
289  *d = src->ToFloat();
290  }
291 }
292 
293 inline void FloatToBFloat16(const float* flt, BFloat16* blf, size_t size) {
294  auto src = flt;
295  auto d = blf;
296  for (; size != 0; ++src, ++d, --size) {
297  *d = BFloat16(*src);
298  }
299 }
300 
301 } // namespace onnxruntime
GLint first
Definition: glcorearb.h:405
bool IsNaN() const noexcept
Tests if the value is NaN
MLFloat16 Abs() const noexcept
Creates an instance that represents absolute value.
ORT_HOST_DEVICE bool operator<(const BFloat16 &rhs) const noexcept
Definition: float16.h:232
BFloat16 Negate() const noexcept
Creates a new instance with the sign flipped.
static const MLFloat16 NegativeInfinity
Definition: float16.h:47
const GLdouble * v
Definition: glcorearb.h:837
MLFloat16(float v) noexcept
Definition: float16.h:42
bool IsSubnormal() const noexcept
Tests if the value is subnormal (denormal).
GLsizei const GLfloat * value
Definition: glcorearb.h:824
void BFloat16ToFloat(const BFloat16 *blf, float *flt, size_t size) noexcept
Definition: float16.h:285
static const BFloat16 MinusOne
Definition: float16.h:189
static const BFloat16 NaN
Definition: float16.h:180
**But if you need a result
Definition: thread.h:613
void FloatToBFloat16(const float *flt, BFloat16 *blf, size_t size)
Definition: float16.h:293
bool IsNegative() const noexcept
Checks if the value is negative
static constexpr MLFloat16 FromBits(uint16_t x) noexcept
Definition: float16.h:38
ORT_HOST_DEVICE BFloat16(float v) noexcept
Definition: float16.h:106
ORT_HOST_DEVICE bool IsNegativeHostDevice() const noexcept
Definition: float16.h:248
static constexpr uint16_t ToUint16Impl(float v) noexcept
Converts from float to uint16_t float16 representation
bool IsFinite() const noexcept
Tests if the value is finite
ORT_HOST_DEVICE bool operator!=(const BFloat16 &rhs) const noexcept
Definition: float16.h:228
static ORT_HOST_DEVICE bool AreZeroHostDevice(const BFloat16Impl &lhs, const BFloat16Impl &rhs) noexcept
Definition: float16.h:256
bool IsPositiveInfinity() const noexcept
Tests if the value represents positive infinity.
static constexpr ORT_HOST_DEVICE FromBitsT FromBits() noexcept
Definition: float16.h:99
bool IsNormal() const noexcept
Tests if the value is normal (not zero, subnormal, infinite, or NaN).
bool IsFinite() const noexcept
Tests if the value is finite
IMATH_NAMESPACE::V2f float
static const MLFloat16 Epsilon
Definition: float16.h:48
Shared implementation between public and internal classes. CRTP pattern.
#define ORT_HOST_DEVICE
Definition: float16.h:25
bool IsNegative() const noexcept
Checks if the value is negative
ORT_HOST_DEVICE bool IsNaNHostDevice() const noexcept
Definition: float16.h:252
ORT_HOST_DEVICE float ToFloat() const noexcept
Definition: float16.h:151
ORT_HOST_DEVICE bool operator==(const BFloat16 &rhs) const noexcept
Definition: float16.h:220
constexpr ORT_HOST_DEVICE BFloat16(unsigned short bits, FromBitsT) noexcept
Definition: float16.h:100
bool IsNegativeInfinity() const noexcept
Tests if the value represents negative infinity
float ToFloatImpl() const noexcept
Converts float16 to float
bool IsPositiveInfinity() const noexcept
Tests if the value represents positive infinity.
static const BFloat16 Infinity
Definition: float16.h:182
MLFloat16 Negate() const noexcept
Creates a new instance with the sign flipped.
GLint GLenum GLint x
Definition: glcorearb.h:409
static const BFloat16 MaxValue
Definition: float16.h:186
static const BFloat16 NegativeNaN
Definition: float16.h:181
bool IsNaNOrZero() const noexcept
Tests if the value is NaN or zero. Useful for comparisons.
bool IsInfinity() const noexcept
Tests if the value is either positive or negative infinity.
static const MLFloat16 Infinity
Definition: float16.h:46
static const MLFloat16 NegativeNaN
Definition: float16.h:45
BFloat16 Abs() const noexcept
Creates an instance that represents absolute value.
bool IsSubnormal() const noexcept
Tests if the value is subnormal (denormal).
GLsizeiptr size
Definition: glcorearb.h:664
static const MLFloat16 Zero
Definition: float16.h:51
bool IsNaN() const noexcept
Tests if the value is NaN
static const BFloat16 One
Definition: float16.h:188
static const MLFloat16 MinusOne
Definition: float16.h:53
static const BFloat16 NegativeInfinity
Definition: float16.h:183
GLuint GLfloat * val
Definition: glcorearb.h:1608
static const MLFloat16 One
Definition: float16.h:52
static const MLFloat16 MaxValue
Definition: float16.h:50
bool IsNormal() const noexcept
Tests if the value is normal (not zero, subnormal, infinite, or NaN).
bool IsNaNOrZero() const noexcept
Tests if the value is NaN or zero. Useful for comparisons.
Definition: core.h:1131
float ToFloat() const noexcept
Definition: float16.h:57
static const BFloat16 Epsilon
Definition: float16.h:184
static constexpr ORT_HOST_DEVICE BFloat16 FromBits(uint16_t bits) noexcept
Definition: float16.h:102
static const MLFloat16 NaN
Definition: float16.h:44
bool IsInfinity() const noexcept
Tests if the value is either positive or negative infinity.
Shared implementation between public and internal classes. CRTP pattern.
static const MLFloat16 MinValue
Definition: float16.h:49
bool IsNegativeInfinity() const noexcept
Tests if the value represents negative infinity
static const BFloat16 MinValue
Definition: float16.h:185
static const BFloat16 Zero
Definition: float16.h:187
GLenum src
Definition: glcorearb.h:1793