8 #if defined(CUDA_VERSION) && CUDA_VERSION >= 11000
12 #if !defined(__CUDACC__) && !defined(__HIPCC__)
20 namespace onnxruntime {
22 #if defined(__CUDACC__) || defined(__HIPCC__)
23 #define ORT_HOST_DEVICE __host__ __device__
25 #define ORT_HOST_DEVICE
83 using Base::operator==;
84 using Base::operator!=;
85 using Base::operator<;
107 #if defined(CUDA_VERSION) && CUDA_VERSION >= 11000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800
108 val = __bfloat16_as_ushort(__float2bfloat16(
v));
109 #elif defined(__HIP__)
112 val = UINT16_C(0x7FC0);
120 uint32_t rounding_bias = ((U32 >> 16) & 1) + UINT32_C(0x7FFF);
121 val =
static_cast<uint16_t
>((U32 + rounding_bias) >> 16);
129 auto get_msb_half = [](
float fl) {
131 if constexpr (onnxruntime_float16::detail::endian::native == onnxruntime_float16::detail::endian::little) {
132 std::memcpy(&result, reinterpret_cast<char*>(&fl) +
sizeof(uint16_t),
sizeof(uint16_t));
134 std::memcpy(&result, &fl,
sizeof(uint16_t));
139 uint16_t upper_bits = get_msb_half(
v);
146 val = get_msb_half(F32);
152 #if defined(CUDA_VERSION) && CUDA_VERSION >= 11000
153 return __bfloat162float(*reinterpret_cast<const __nv_bfloat16*>(&
val));
154 #elif defined(__HIP__)
159 float* tempRes =
reinterpret_cast<float*
>(&tmp);
165 return std::numeric_limits<float>::quiet_NaN();
169 char*
const first =
reinterpret_cast<char*
>(&
result);
171 char*
const second = first +
sizeof(uint16_t);
172 std::memcpy(second, &
val,
sizeof(uint16_t));
174 std::memcpy(first, &
val,
sizeof(uint16_t));
215 #if defined(CUDA_VERSION) && CUDA_VERSION >= 11000
217 explicit ORT_HOST_DEVICE operator __nv_bfloat16()
const {
return *
reinterpret_cast<const __nv_bfloat16*
>(&
val); }
225 return val == rhs.val;
229 return !(*
this == rhs);
239 if (left_is_negative != rhs.IsNegativeHostDevice()) {
245 return (
val != rhs.val) && ((
val < rhs.val) ^ left_is_negative);
260 return static_cast<uint16_t
>((lhs.val | rhs.val) & ~
kSignMask) == 0;
267 #if !defined(__CUDACC__) && !defined(__HIPCC__)
268 inline MLFloat16 operator"" _f16(
unsigned long long int v) noexcept {
276 inline BFloat16 operator"" _b16(
unsigned long long int v) noexcept {
280 inline BFloat16 operator"" _bfp16(
long double v) noexcept {
296 for (; size != 0; ++
src, ++d, --
size) {
bool IsNaN() const noexcept
Tests if the value is NaN
MLFloat16 Abs() const noexcept
Creates an instance that represents absolute value.
ORT_HOST_DEVICE bool operator<(const BFloat16 &rhs) const noexcept
BFloat16 Negate() const noexcept
Creates a new instance with the sign flipped.
static const MLFloat16 NegativeInfinity
MLFloat16(float v) noexcept
bool IsSubnormal() const noexcept
Tests if the value is subnormal (denormal).
GLsizei const GLfloat * value
void BFloat16ToFloat(const BFloat16 *blf, float *flt, size_t size) noexcept
static const BFloat16 MinusOne
static const BFloat16 NaN
**But if you need a result
void FloatToBFloat16(const float *flt, BFloat16 *blf, size_t size)
bool IsNegative() const noexcept
Checks if the value is negative
static constexpr MLFloat16 FromBits(uint16_t x) noexcept
ORT_HOST_DEVICE BFloat16(float v) noexcept
ORT_HOST_DEVICE bool IsNegativeHostDevice() const noexcept
static constexpr uint16_t ToUint16Impl(float v) noexcept
Converts from float to uint16_t float16 representation
bool IsFinite() const noexcept
Tests if the value is finite
ORT_HOST_DEVICE bool operator!=(const BFloat16 &rhs) const noexcept
static ORT_HOST_DEVICE bool AreZeroHostDevice(const BFloat16Impl &lhs, const BFloat16Impl &rhs) noexcept
bool IsPositiveInfinity() const noexcept
Tests if the value represents positive infinity.
static constexpr ORT_HOST_DEVICE FromBitsT FromBits() noexcept
bool IsNormal() const noexcept
Tests if the value is normal (not zero, subnormal, infinite, or NaN).
bool IsFinite() const noexcept
Tests if the value is finite
IMATH_NAMESPACE::V2f float
static const MLFloat16 Epsilon
Shared implementation between public and internal classes. CRTP pattern.
bool IsNegative() const noexcept
Checks if the value is negative
ORT_HOST_DEVICE bool IsNaNHostDevice() const noexcept
ORT_HOST_DEVICE float ToFloat() const noexcept
ORT_HOST_DEVICE bool operator==(const BFloat16 &rhs) const noexcept
constexpr ORT_HOST_DEVICE BFloat16(unsigned short bits, FromBitsT) noexcept
bool IsNegativeInfinity() const noexcept
Tests if the value represents negative infinity
float ToFloatImpl() const noexcept
Converts float16 to float
bool IsPositiveInfinity() const noexcept
Tests if the value represents positive infinity.
static constexpr uint16_t kPositiveQNaNBits
static const BFloat16 Infinity
MLFloat16 Negate() const noexcept
Creates a new instance with the sign flipped.
static const BFloat16 MaxValue
static const BFloat16 NegativeNaN
bool IsNaNOrZero() const noexcept
Tests if the value is NaN or zero. Useful for comparisons.
bool IsInfinity() const noexcept
Tests if the value is either positive or negative infinity.
static const MLFloat16 Infinity
static const MLFloat16 NegativeNaN
BFloat16 Abs() const noexcept
Creates an instance that represents absolute value.
bool IsSubnormal() const noexcept
Tests if the value is subnormal (denormal).
static const MLFloat16 Zero
static constexpr uint16_t kPositiveInfinityBits
bool IsNaN() const noexcept
Tests if the value is NaN
static const BFloat16 One
static const MLFloat16 MinusOne
static const BFloat16 NegativeInfinity
static constexpr uint16_t kRoundToNearest
static constexpr uint16_t kSignMask
static const MLFloat16 One
static const MLFloat16 MaxValue
bool IsNormal() const noexcept
Tests if the value is normal (not zero, subnormal, infinite, or NaN).
bool IsNaNOrZero() const noexcept
Tests if the value is NaN or zero. Useful for comparisons.
float ToFloat() const noexcept
static const BFloat16 Epsilon
static constexpr ORT_HOST_DEVICE BFloat16 FromBits(uint16_t bits) noexcept
static const MLFloat16 NaN
bool IsInfinity() const noexcept
Tests if the value is either positive or negative infinity.
Shared implementation between public and internal classes. CRTP pattern.
static const MLFloat16 MinValue
bool IsNegativeInfinity() const noexcept
Tests if the value represents negative infinity
static const BFloat16 MinValue
static const BFloat16 Zero