6 #if !defined(DISABLE_FLOAT8_TYPES)
9 #if defined(CUDA_VERSION) && CUDA_VERSION >= 11080
13 #if !defined(__CUDACC__) && !defined(__HIPCC__)
19 namespace onnxruntime {
21 #if defined(__CUDACC__) || defined(__HIPCC__)
22 #define ORT_HOST_DEVICE __host__ __device__
24 #define ORT_HOST_DEVICE
40 #if defined(CUDA_VERSION) && CUDA_VERSION >= 11080
41 val = __nv_cvt_float_to_fp8(v,
saturate ? __NV_SATFINITE : __NV_NOSAT, __NV_E4M3);
44 std::memcpy(&b, &v,
sizeof(b));
46 val =
static_cast<uint8_t
>((b & 0x80000000) >> 24);
47 if ((b & 0x7fffffff) == 0x7f800000) {
53 }
else if ((b & 0x7F800000) == 0x7F800000) {
56 uint8_t e =
static_cast<uint8_t
>((b & 0x7F800000) >> 23);
57 uint32_t m =
static_cast<uint32_t
>(b & 0x007FFFFF);
69 auto mask = 1 << (20 + d);
70 if ((m &
mask) && ((
val & 1) || ((m & (mask - 1)) > 0) || ((m &
mask) && (m & (mask << 1)) && ((m & (mask - 1)) == 0)))) {
83 if ((
val & 0x7F) == 0x7F) {
87 if ((m & 0x80000) && ((m & 0x100000) || (m & 0x7FFFF))) {
88 if ((
val & 0x7F) < 0x7E) {
106 #if defined(CUDA_VERSION) && CUDA_VERSION >= 11080
107 return __half2float(__nv_cvt_fp8_to_halfraw(
val, __NV_E4M3));
112 }
else if (
val == 127) {
115 uint32_t expo = (
val & 0x78) >> 3;
116 uint32_t mant =
val & 0x07;
122 if ((mant & 0x4) == 0) {
127 if ((mant & 0x4) == 0) {
132 res |= (mant & 0x3) << 21;
143 std::memcpy(&float_res, &res,
sizeof(
float));
150 #if defined(CUDA_VERSION) && CUDA_VERSION >= 11080
152 explicit ORT_HOST_DEVICE operator __nv_fp8_e4m3()
const {
return *
reinterpret_cast<const __nv_fp8_e4m3*
>(&
val); }
162 #if !defined(__CUDACC__) && !defined(__HIPCC__)
177 for (; size != 0; ++
src, ++d, --
size) {
185 for (; size != 0; ++
src, ++d, --
size) {
206 std::memcpy(&b, &v,
sizeof(b));
208 val =
static_cast<uint8_t
>((b & 0x80000000) >> 24);
209 if ((b & 0x7fffffff) == 0x7f800000) {
217 }
else if ((b & 0x7F800000) == 0x7F800000) {
220 uint8_t e =
static_cast<uint8_t
>((b & 0x7F800000) >> 23);
221 uint32_t m =
static_cast<uint32_t
>(b & 0x007FFFFF);
224 }
else if (e < 120) {
229 val |= m >> (21 + d);
233 auto mask = 1 << (20 + d);
234 if ((m &
mask) && ((
val & 1) || ((m & (mask - 1)) > 0) || ((m &
mask) && (m & (mask << 1)) && ((m & (mask - 1)) == 0)))) {
238 }
else if (e < 135) {
248 if ((m & 0x80000) && ((m & 0x100000) || (m & 0x7FFFF))) {
249 if ((
val & 0x7F) < 0x7F) {
274 uint32_t expo = (
val & 0x78) >> 3;
275 uint32_t mant =
val & 0x07;
281 if ((mant & 0x4) == 0) {
286 if ((mant & 0x4) == 0) {
291 res |= (mant & 0x3) << 21;
302 std::memcpy(&float_res, &res,
sizeof(
float));
315 #if !defined(__CUDACC__) && !defined(__HIPCC__)
330 for (; size != 0; ++
src, ++d, --
size) {
338 for (; size != 0; ++
src, ++d, --
size) {
357 #if defined(CUDA_VERSION) && CUDA_VERSION >= 11080
358 val = __nv_cvt_float_to_fp8(v,
saturate ? __NV_SATFINITE : __NV_NOSAT, __NV_E5M2);
361 std::memcpy(&b, &v,
sizeof(b));
363 val = (b & 0x80000000) >> 24;
364 if ((b & 0x7FFFFFFF) == 0x7F800000) {
372 }
else if ((b & 0x7F800000) == 0x7F800000) {
375 uint32_t e = (b & 0x7F800000) >> 23;
376 uint32_t m = b & 0x007FFFFF;
380 }
else if (e < 113) {
385 val |= m >> (22 + d);
389 auto mask = 1 << (21 + d);
390 if ((m &
mask) && ((
val & 1) || ((m & (mask - 1)) > 0) || ((m &
mask) && (m & (mask << 1)) && ((m & (mask - 1)) == 0)))) {
394 }
else if (e < 143) {
398 if ((m & 0x100000) && ((m & 0xFFFFF) || (m & 0x200000))) {
399 if ((
val & 0x7F) < 0x7B) {
419 #if defined(CUDA_VERSION) && CUDA_VERSION >= 11080
420 return __half2float(__nv_cvt_fp8_to_halfraw(
val, __NV_E5M2));
425 }
else if (
val >= 125 &&
val <= 127) {
427 }
else if (
val == 252) {
429 }
else if (
val == 124) {
432 uint32_t expo = (
val & 0x7C) >> 2;
433 uint32_t mant =
val & 0x03;
439 if ((mant & 0
x2) == 0) {
444 res |= (mant & 0x1) << 22;
456 std::memcpy(&float_res, &res,
sizeof(
float));
463 #if defined(CUDA_VERSION) && CUDA_VERSION >= 11080
465 explicit ORT_HOST_DEVICE operator __nv_fp8_e5m2()
const {
return *
reinterpret_cast<const __nv_fp8_e5m2*
>(&
val); }
475 #if !defined(__CUDACC__) && !defined(__HIPCC__)
477 inline Float8E5M2 operator"" _f8e5m2fn(
unsigned long long int v) {
482 return Float8E5M2(static_cast<float>(v),
true);
490 for (; size != 0; ++
src, ++d, --
size) {
498 for (; size != 0; ++
src, ++d, --
size) {
519 std::memcpy(&b, &v,
sizeof(b));
521 val = (b & 0x80000000) >> 24;
522 if ((b & 0x7FFFFFFF) == 0x7F800000) {
528 }
else if ((b & 0x7F800000) == 0x7F800000) {
531 uint32_t e = (b & 0x7F800000) >> 23;
532 uint32_t m = b & 0x007FFFFF;
536 }
else if (e < 112) {
541 val |= m >> (22 + d);
545 auto mask = 1 << (21 + d);
546 if ((m &
mask) && ((
val & 1) || ((m & (mask - 1)) > 0) || ((m &
mask) && (m & (mask << 1)) && ((m & (mask - 1)) == 0)))) {
550 }
else if (e < 143) {
555 if ((m & 0x100000) && ((m & 0xFFFFF) || (m & 0x200000))) {
556 if ((
val & 0x7F) < 0x7F) {
563 }
else if ((e == 255) && (m == 0)) {
583 uint32_t expo = (
val & 0x7C) >> 2;
584 uint32_t mant =
val & 0x03;
590 if ((mant & 0
x2) == 0) {
595 res |= (mant & 0x1) << 22;
607 std::memcpy(&float_res, &res,
sizeof(
float));
620 #if !defined(__CUDACC__) && !defined(__HIPCC__)
635 for (; size != 0; ++
src, ++d, --
size) {
643 for (; size != 0; ++
src, ++d, --
size) {
650 #endif // DISABLE_FLOAT8_TYPES
void FloatToFloat8E4M3FNUZ(const float *flt, Float8E4M3FNUZ *blf, size_t size, bool saturate)
static constexpr ORT_HOST_DEVICE FromBitsT FromBits()
GLsizei const GLfloat * value
ORT_HOST_DEVICE bool operator<(const Float8E4M3FN &left, const Float8E4M3FN &right)
ORT_HOST_DEVICE float ToFloat() const
ORT_HOST_DEVICE float ToFloat() const
void Float8E4M3FNUZToFloat(const Float8E4M3FNUZ *blf, float *flt, size_t size)
ORT_HOST_DEVICE Float8E4M3FNUZ(float v, bool saturate=true)
static constexpr ORT_HOST_DEVICE FromBitsT FromBits()
static constexpr ORT_HOST_DEVICE FromBitsT FromBits()
constexpr ORT_HOST_DEVICE Float8E4M3FN(unsigned char bits, FromBitsT)
ORT_HOST_DEVICE bool operator==(const Float8E4M3FN &left, const Float8E4M3FN &right)
ORT_HOST_DEVICE bool operator!=(const Float8E4M3FN &left, const Float8E4M3FN &right)
void FloatToFloat8E4M3FN(const float *flt, Float8E4M3FN *blf, size_t size, bool saturate)
ORT_HOST_DEVICE Float8E5M2FNUZ(float v, bool saturate=true)
IMATH_NAMESPACE::V2f float
constexpr ORT_HOST_DEVICE Float8E4M3FNUZ(unsigned char bits, FromBitsT)
ORT_HOST_DEVICE Float8E5M2(float v, bool saturate=true)
ImageBuf OIIO_API saturate(const ImageBuf &src, float scale=0.0f, int firstchannel=0, ROI roi={}, int nthreads=0)
IMATH_HOSTDEVICE constexpr int sign(T a) IMATH_NOEXCEPT
GLboolean GLboolean GLboolean b
static constexpr ORT_HOST_DEVICE FromBitsT FromBits()
void FloatToFloat8E5M2(const float *flt, Float8E5M2 *blf, size_t size, bool saturate)
ORT_HOST_DEVICE Float8E4M3FN(float v, bool saturate=true)
void Float8E5M2ToFloat(const Float8E5M2 *blf, float *flt, size_t size)
void FloatToFloat8E5M2FNUZ(const float *flt, Float8E5M2FNUZ *blf, size_t size, bool saturate)
ORT_HOST_DEVICE float ToFloat() const
constexpr ORT_HOST_DEVICE Float8E5M2(unsigned char bits, FromBitsT)
void Float8E4M3FNToFloat(const Float8E4M3FN *blf, float *flt, size_t size)
void Float8E5M2FNUZToFloat(const Float8E5M2FNUZ *blf, float *flt, size_t size)
constexpr ORT_HOST_DEVICE Float8E5M2FNUZ(unsigned char bits, FromBitsT)
ORT_HOST_DEVICE float ToFloat() const