docs/hdk/float16_8h_source.html

 // Copyright (c) Microsoft Corporation. All rights reserved.

 // Licensed under the MIT License.

 #pragma once


 #include <math.h>


 #include "endian.h"

 #if defined(CUDA_VERSION) && CUDA_VERSION >= 11000

 #include "cuda_bf16.h"

 #endif


 #if !defined(__CUDACC__) && !defined(__HIPCC__)

 #include "core/common/narrow.h"

 #endif


 #include "core/common/common.h"


 #include "core/session/onnxruntime_float16.h"


 namespace onnxruntime {


 #if defined(__CUDACC__) || defined(__HIPCC__)

 #define ORT_HOST_DEVICE __host__ __device__

 #else

 #define ORT_HOST_DEVICE

 #endif


 // MLFloat16

 struct MLFloat16 : onnxruntime_float16::Float16Impl<MLFloat16> {

  private:

   explicit constexpr MLFloat16(uint16_t x) noexcept { val = x; }


  public:

   using Base = onnxruntime_float16::Float16Impl<MLFloat16>;


   MLFloat16() = default;


   constexpr static MLFloat16 FromBits(uint16_t x) noexcept { return MLFloat16(x); }


   // Using inherited implementation instead of math floatToHalf allows us to use this

   // in other shared providers without having to implement the bridge

   explicit MLFloat16(float v) noexcept { val = Base::ToUint16Impl(v); }


   static const MLFloat16 NaN;

   static const MLFloat16 NegativeNaN;

   static const MLFloat16 Infinity;

   static const MLFloat16 NegativeInfinity;

   static const MLFloat16 Epsilon;

   static const MLFloat16 MinValue;

   static const MLFloat16 MaxValue;

   static const MLFloat16 Zero;

   static const MLFloat16 One;

   static const MLFloat16 MinusOne;


   // Using inherited implementation instead of math halfToFloat allows us to use this

   // in other shared providers without having to implement the bridge

   float ToFloat() const noexcept { return Base::ToFloatImpl(); }


   using Base::IsNegative;


   using Base::IsNaN;


   using Base::IsFinite;


   using Base::IsPositiveInfinity;


   using Base::IsNegativeInfinity;


   using Base::IsInfinity;


   using Base::IsNaNOrZero;


   using Base::IsNormal;


   using Base::IsSubnormal;


   using Base::Abs;


   using Base::Negate;


   operator float() const noexcept { return ToFloat(); }


   using Base::operator==;

   using Base::operator!=;

   using Base::operator<;

 };


 // BFloat16

 struct BFloat16 : onnxruntime_float16::BFloat16Impl<BFloat16> {

   using Base = onnxruntime_float16::BFloat16Impl<BFloat16>;


 #if defined(__HIP__)

   ORT_HOST_DEVICE BFloat16() = default;

 #else

   BFloat16() = default;

 #endif


   struct FromBitsT {};

   static constexpr ORT_HOST_DEVICE FromBitsT FromBits() noexcept { return FromBitsT(); }

   constexpr ORT_HOST_DEVICE BFloat16(unsigned short bits, FromBitsT) noexcept { val = bits; }


   static constexpr ORT_HOST_DEVICE BFloat16 FromBits(uint16_t bits) noexcept {

     return BFloat16(bits, FromBits());

   }


   inline ORT_HOST_DEVICE BFloat16(float v) noexcept {

 #if defined(CUDA_VERSION) && CUDA_VERSION >= 11000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800

     val = __bfloat16_as_ushort(__float2bfloat16(v));

 #elif defined(__HIP__)

     // We should be using memcpy in order to respect the strict aliasing rule but it fails in the HIP environment.

     if (v != v) {  // isnan

       val = UINT16_C(0x7FC0);

     } else {

       union {

         uint32_t U32;

         float F32;

       };


       F32 = v;

       uint32_t rounding_bias = ((U32 >> 16) & 1) + UINT32_C(0x7FFF);

       val = static_cast<uint16_t>((U32 + rounding_bias) >> 16);

     }

 #else


     // Use C isnan to work both in host and device

     if (::isnan(v)) {

       val = kPositiveQNaNBits;

     } else {

       auto get_msb_half = [](float fl) {

         uint16_t result;

         if constexpr (onnxruntime_float16::detail::endian::native == onnxruntime_float16::detail::endian::little) {

           std::memcpy(&result, reinterpret_cast<char*>(&fl) + sizeof(uint16_t), sizeof(uint16_t));

         } else {

           std::memcpy(&result, &fl, sizeof(uint16_t));

         }

         return result;

       };


       uint16_t upper_bits = get_msb_half(v);

       union {

         uint32_t U32;

         float F32;

       };

       F32 = v;

       U32 += (upper_bits & 1) + kRoundToNearest;

       val = get_msb_half(F32);

     }

 #endif

   }


   inline ORT_HOST_DEVICE float ToFloat() const noexcept {

 #if defined(CUDA_VERSION) && CUDA_VERSION >= 11000

     return __bfloat162float(*reinterpret_cast<const __nv_bfloat16*>(&val));

 #elif defined(__HIP__)

     // We should be using memcpy in order to respect the strict aliasing rule but it fails in the HIP environment.

     float result = 0;

     uint32_t tmp = val;

     tmp <<= 16;

     float* tempRes = reinterpret_cast<float*>(&tmp);

     result = *tempRes;

     return result;

 #else


     if (IsNaNHostDevice()) {

       return std::numeric_limits<float>::quiet_NaN();

     }


     float result = 0;

     char* const first = reinterpret_cast<char*>(&result);

     if constexpr (endian::native == endian::little) {

       char* const second = first + sizeof(uint16_t);

       std::memcpy(second, &val, sizeof(uint16_t));

     } else {

       std::memcpy(first, &val, sizeof(uint16_t));

     }

     return result;

 #endif

   }


   static const BFloat16 NaN;

   static const BFloat16 NegativeNaN;

   static const BFloat16 Infinity;

   static const BFloat16 NegativeInfinity;

   static const BFloat16 Epsilon;

   static const BFloat16 MinValue;

   static const BFloat16 MaxValue;

   static const BFloat16 Zero;

   static const BFloat16 One;

   static const BFloat16 MinusOne;


   using Base::IsNegative;


   using Base::IsNaN;


   using Base::IsFinite;


   using Base::IsPositiveInfinity;


   using Base::IsNegativeInfinity;


   using Base::IsInfinity;


   using Base::IsNaNOrZero;


   using Base::IsNormal;


   using Base::IsSubnormal;


   using Base::Abs;


   using Base::Negate;


   ORT_HOST_DEVICE operator float() const noexcept { return ToFloat(); }


 #if defined(CUDA_VERSION) && CUDA_VERSION >= 11000

   ORT_HOST_DEVICE BFloat16(const __nv_bfloat16& value) { val = *reinterpret_cast<const unsigned short*>(&value); }

   explicit ORT_HOST_DEVICE operator __nv_bfloat16() const { return *reinterpret_cast<const __nv_bfloat16*>(&val); }

 #endif


   ORT_HOST_DEVICE bool operator==(const BFloat16& rhs) const noexcept {

     if (IsNaNHostDevice() || rhs.IsNaNHostDevice()) {

       // IEEE defines that NaN is not equal to anything, including itself.

       return false;

     }

     return val == rhs.val;

   }


   ORT_HOST_DEVICE bool operator!=(const BFloat16& rhs) const noexcept {

     return !(*this == rhs);

   }


   ORT_HOST_DEVICE bool operator<(const BFloat16& rhs) const noexcept {

     if (IsNaNHostDevice() || rhs.IsNaNHostDevice()) {

       // IEEE defines that NaN is unordered with respect to everything, including itself.

       return false;

     }


     const bool left_is_negative = IsNegativeHostDevice();

     if (left_is_negative != rhs.IsNegativeHostDevice()) {

       // When the signs of left and right differ, we know that left is less than right if it is

       // the negative value. The exception to this is if both values are zero, in which case IEEE

       // says they should be equal, even if the signs differ.

       return left_is_negative && !AreZeroHostDevice(*this, rhs);

     }

     return (val != rhs.val) && ((val < rhs.val) ^ left_is_negative);

   }


   ORT_HOST_DEVICE bool IsNegativeHostDevice() const noexcept {

     return (val & kSignMask) != 0;

   }


   ORT_HOST_DEVICE bool IsNaNHostDevice() const noexcept {

     return static_cast<uint16_t>(val & ~kSignMask) > kPositiveInfinityBits;

   }


   ORT_HOST_DEVICE static bool AreZeroHostDevice(const BFloat16Impl& lhs, const BFloat16Impl& rhs) noexcept {

     // IEEE defines that positive and negative zero are equal, this gives us a quick equality check

     // for two values by or'ing the private bits together and stripping the sign. They are both zero,

     // and therefore equivalent, if the resulting value is still zero.

     return static_cast<uint16_t>((lhs.val | rhs.val) & ~kSignMask) == 0;

   }

 };


 // User defined suffixes to make it easier to declare

 // initializers with MLFloat16 and BFloat16 from unsigned short

 // E.g 10_f16 or 10_b16

 #if !defined(__CUDACC__) && !defined(__HIPCC__)

 inline MLFloat16 operator"" _f16(unsigned long long int v) noexcept {

   return MLFloat16::FromBits(narrow<uint16_t>(v));

 }


 inline MLFloat16 operator"" _fp16(long double v) noexcept {

   return MLFloat16(static_cast<float>(v));

 }


 inline BFloat16 operator"" _b16(unsigned long long int v) noexcept {

   return BFloat16::FromBits((narrow<uint16_t>(v)));

 }


 inline BFloat16 operator"" _bfp16(long double v) noexcept {

   return BFloat16(static_cast<float>(v));

 }

 #endif


 inline void BFloat16ToFloat(const BFloat16* blf, float* flt, size_t size) noexcept {

   auto src = blf;

   auto d = flt;

   for (; size != 0; ++src, ++d, --size) {

     *d = src->ToFloat();

   }

 }


 inline void FloatToBFloat16(const float* flt, BFloat16* blf, size_t size) {

   auto src = flt;

   auto d = blf;

   for (; size != 0; ++src, ++d, --size) {

     *d = BFloat16(*src);

   }

 }


 }  // namespace onnxruntime

onnxruntime_float16.h

first
GLint first
Definition: glcorearb.h:405

onnxruntime::MLFloat16
Definition: float16.h:29

onnxruntime_float16::Float16Impl< MLFloat16 >::IsNaN
bool IsNaN() const noexcept
Tests if the value is NaN
Definition: onnxruntime_float16.h:100

onnxruntime_float16::Float16Impl< MLFloat16 >::Abs
MLFloat16 Abs() const noexcept
Creates an instance that represents absolute value.
Definition: onnxruntime_float16.h:171

onnxruntime::BFloat16::operator<
ORT_HOST_DEVICE bool operator<(const BFloat16 &rhs) const noexcept
Definition: float16.h:232

onnxruntime_float16::BFloat16Impl< BFloat16 >::Negate
BFloat16 Negate() const noexcept
Creates a new instance with the sign flipped.
Definition: onnxruntime_float16.h:468

onnxruntime::MLFloat16::NegativeInfinity
static const MLFloat16 NegativeInfinity
Definition: float16.h:47

v
const GLdouble * v
Definition: glcorearb.h:837

onnxruntime::MLFloat16::MLFloat16
MLFloat16(float v) noexcept
Definition: float16.h:42

onnxruntime_float16::Float16Impl< MLFloat16 >::IsSubnormal
bool IsSubnormal() const noexcept
Tests if the value is subnormal (denormal).
Definition: onnxruntime_float16.h:160

value
GLsizei const GLfloat * value
Definition: glcorearb.h:824

onnxruntime::BFloat16ToFloat
void BFloat16ToFloat(const BFloat16 *blf, float *flt, size_t size) noexcept
Definition: float16.h:285

onnxruntime::BFloat16::MinusOne
static const BFloat16 MinusOne
Definition: float16.h:189

endian::little

onnxruntime::BFloat16::FromBitsT
Definition: float16.h:98

onnxruntime::BFloat16::NaN
static const BFloat16 NaN
Definition: float16.h:180

result
**But if you need a result
Definition: thread.h:613

onnxruntime::FloatToBFloat16
void FloatToBFloat16(const float *flt, BFloat16 *blf, size_t size)
Definition: float16.h:293

onnxruntime_float16::Float16Impl< MLFloat16 >::IsNegative
bool IsNegative() const noexcept
Checks if the value is negative
Definition: onnxruntime_float16.h:92

onnxruntime::MLFloat16::FromBits
static constexpr MLFloat16 FromBits(uint16_t x) noexcept
Definition: float16.h:38

onnxruntime::BFloat16::BFloat16
ORT_HOST_DEVICE BFloat16(float v) noexcept
Definition: float16.h:106

onnxruntime::BFloat16::IsNegativeHostDevice
ORT_HOST_DEVICE bool IsNegativeHostDevice() const noexcept
Definition: float16.h:248

onnxruntime_float16::Float16Impl< MLFloat16 >::ToUint16Impl
static constexpr uint16_t ToUint16Impl(float v) noexcept
Converts from float to uint16_t float16 representation

onnxruntime_float16::Float16Impl< MLFloat16 >::IsFinite
bool IsFinite() const noexcept
Tests if the value is finite
Definition: onnxruntime_float16.h:108

common.h

onnxruntime::BFloat16::operator!=
ORT_HOST_DEVICE bool operator!=(const BFloat16 &rhs) const noexcept
Definition: float16.h:228

onnxruntime::BFloat16::AreZeroHostDevice
static ORT_HOST_DEVICE bool AreZeroHostDevice(const BFloat16Impl &lhs, const BFloat16Impl &rhs) noexcept
Definition: float16.h:256

onnxruntime_float16::BFloat16Impl< BFloat16 >::IsPositiveInfinity
bool IsPositiveInfinity() const noexcept
Tests if the value represents positive infinity.
Definition: onnxruntime_float16.h:407

onnxruntime::BFloat16::FromBits
static constexpr ORT_HOST_DEVICE FromBitsT FromBits() noexcept
Definition: float16.h:99

onnxruntime_float16::Float16Impl< MLFloat16 >::IsNormal
bool IsNormal() const noexcept
Tests if the value is normal (not zero, subnormal, infinite, or NaN).
Definition: onnxruntime_float16.h:149

onnxruntime_float16::BFloat16Impl< BFloat16 >::IsFinite
bool IsFinite() const noexcept
Tests if the value is finite
Definition: onnxruntime_float16.h:399

float
IMATH_NAMESPACE::V2f float
Definition: ImfStandardAttributes.h:706

onnxruntime::MLFloat16::Epsilon
static const MLFloat16 Epsilon
Definition: float16.h:48

onnxruntime_float16::BFloat16Impl
Shared implementation between public and internal classes. CRTP pattern.
Definition: onnxruntime_float16.h:328

ORT_HOST_DEVICE
#define ORT_HOST_DEVICE
Definition: float16.h:25

onnxruntime_float16::BFloat16Impl< BFloat16 >::IsNegative
bool IsNegative() const noexcept
Checks if the value is negative
Definition: onnxruntime_float16.h:383

onnxruntime::BFloat16::IsNaNHostDevice
ORT_HOST_DEVICE bool IsNaNHostDevice() const noexcept
Definition: float16.h:252

endian.h

onnxruntime::MLFloat16::MLFloat16
MLFloat16()=default

onnxruntime::BFloat16::ToFloat
ORT_HOST_DEVICE float ToFloat() const noexcept
Definition: float16.h:151

onnxruntime::BFloat16::operator==
ORT_HOST_DEVICE bool operator==(const BFloat16 &rhs) const noexcept
Definition: float16.h:220

onnxruntime::BFloat16::BFloat16
constexpr ORT_HOST_DEVICE BFloat16(unsigned short bits, FromBitsT) noexcept
Definition: float16.h:100

onnxruntime_float16::BFloat16Impl< BFloat16 >::IsNegativeInfinity
bool IsNegativeInfinity() const noexcept
Tests if the value represents negative infinity
Definition: onnxruntime_float16.h:415

onnxruntime_float16::Float16Impl< MLFloat16 >::ToFloatImpl
float ToFloatImpl() const noexcept
Converts float16 to float

onnxruntime_float16::Float16Impl< MLFloat16 >::IsPositiveInfinity
bool IsPositiveInfinity() const noexcept
Tests if the value represents positive infinity.
Definition: onnxruntime_float16.h:116

onnxruntime_float16::BFloat16Impl< BFloat16 >::kPositiveQNaNBits
static constexpr uint16_t kPositiveQNaNBits
Definition: onnxruntime_float16.h:365

onnxruntime_float16::BFloat16Impl< BFloat16 >::BFloat16Impl
BFloat16Impl()=default

onnxruntime::BFloat16::Infinity
static const BFloat16 Infinity
Definition: float16.h:182

onnxruntime_float16::Float16Impl< MLFloat16 >::Negate
MLFloat16 Negate() const noexcept
Creates a new instance with the sign flipped.
Definition: onnxruntime_float16.h:177

x
GLint GLenum GLint x
Definition: glcorearb.h:409

onnxruntime::BFloat16::MaxValue
static const BFloat16 MaxValue
Definition: float16.h:186

onnxruntime::BFloat16::NegativeNaN
static const BFloat16 NegativeNaN
Definition: float16.h:181

onnxruntime_float16::BFloat16Impl< BFloat16 >::IsNaNOrZero
bool IsNaNOrZero() const noexcept
Tests if the value is NaN or zero. Useful for comparisons.
Definition: onnxruntime_float16.h:431

onnxruntime_float16::Float16Impl< MLFloat16 >::IsInfinity
bool IsInfinity() const noexcept
Tests if the value is either positive or negative infinity.
Definition: onnxruntime_float16.h:132

onnxruntime::MLFloat16::Infinity
static const MLFloat16 Infinity
Definition: float16.h:46

onnxruntime::MLFloat16::NegativeNaN
static const MLFloat16 NegativeNaN
Definition: float16.h:45

onnxruntime_float16::BFloat16Impl< BFloat16 >::Abs
BFloat16 Abs() const noexcept
Creates an instance that represents absolute value.
Definition: onnxruntime_float16.h:462

onnxruntime_float16::BFloat16Impl< BFloat16 >::IsSubnormal
bool IsSubnormal() const noexcept
Tests if the value is subnormal (denormal).
Definition: onnxruntime_float16.h:451

onnxruntime::BFloat16
Definition: float16.h:89

narrow.h

size
GLsizeiptr size
Definition: glcorearb.h:664

onnxruntime::MLFloat16::Zero
static const MLFloat16 Zero
Definition: float16.h:51

onnxruntime_float16::BFloat16Impl< BFloat16 >::kPositiveInfinityBits
static constexpr uint16_t kPositiveInfinityBits
Definition: onnxruntime_float16.h:363

onnxruntime_float16::BFloat16Impl< BFloat16 >::IsNaN
bool IsNaN() const noexcept
Tests if the value is NaN
Definition: onnxruntime_float16.h:391

onnxruntime::BFloat16::One
static const BFloat16 One
Definition: float16.h:188

onnxruntime::MLFloat16::MinusOne
static const MLFloat16 MinusOne
Definition: float16.h:53

onnxruntime::BFloat16::BFloat16
BFloat16()=default

onnxruntime::BFloat16::NegativeInfinity
static const BFloat16 NegativeInfinity
Definition: float16.h:183

val
GLuint GLfloat * val
Definition: glcorearb.h:1608

onnxruntime_float16::BFloat16Impl< BFloat16 >::kRoundToNearest
static constexpr uint16_t kRoundToNearest
Definition: onnxruntime_float16.h:371

onnxruntime_float16::BFloat16Impl< BFloat16 >::kSignMask
static constexpr uint16_t kSignMask
Definition: onnxruntime_float16.h:361

onnxruntime::MLFloat16::One
static const MLFloat16 One
Definition: float16.h:52

onnxruntime::MLFloat16::MaxValue
static const MLFloat16 MaxValue
Definition: float16.h:50

onnxruntime_float16::BFloat16Impl< BFloat16 >::IsNormal
bool IsNormal() const noexcept
Tests if the value is normal (not zero, subnormal, infinite, or NaN).
Definition: onnxruntime_float16.h:440

endian::native

onnxruntime_float16::BFloat16Impl< BFloat16 >::val
uint16_t val
Definition: onnxruntime_float16.h:375

onnxruntime_float16::Float16Impl< MLFloat16 >::IsNaNOrZero
bool IsNaNOrZero() const noexcept
Tests if the value is NaN or zero. Useful for comparisons.
Definition: onnxruntime_float16.h:140

value
Definition: core.h:1131

onnxruntime::MLFloat16::ToFloat
float ToFloat() const noexcept
Definition: float16.h:57

onnxruntime::BFloat16::Epsilon
static const BFloat16 Epsilon
Definition: float16.h:184

onnxruntime::BFloat16::FromBits
static constexpr ORT_HOST_DEVICE BFloat16 FromBits(uint16_t bits) noexcept
Definition: float16.h:102

onnxruntime::MLFloat16::NaN
static const MLFloat16 NaN
Definition: float16.h:44

onnxruntime_float16::BFloat16Impl< BFloat16 >::IsInfinity
bool IsInfinity() const noexcept
Tests if the value is either positive or negative infinity.
Definition: onnxruntime_float16.h:423

onnxruntime_float16::Float16Impl
Shared implementation between public and internal classes. CRTP pattern.
Definition: onnxruntime_float16.h:39

onnxruntime::MLFloat16::MinValue
static const MLFloat16 MinValue
Definition: float16.h:49

onnxruntime_float16::Float16Impl< MLFloat16 >::IsNegativeInfinity
bool IsNegativeInfinity() const noexcept
Tests if the value represents negative infinity
Definition: onnxruntime_float16.h:124

onnxruntime::BFloat16::MinValue
static const BFloat16 MinValue
Definition: float16.h:185

onnxruntime::BFloat16::Zero
static const BFloat16 Zero
Definition: float16.h:187

src
GLenum src
Definition: glcorearb.h:1793