30 #if defined(FORCE_NON_SIMD)
33 #if defined(LINUX) && SYS_IS_GCC_GE(3, 4) && defined(__SSE2__)
37 #elif defined(MBSD_INTEL)
56 SYS_STATIC_ASSERT_MSG(SYSisPOD<v4uu>(),
"v4uu should be POD, for better performance in UT_Array, etc.");
84 { *
this = (c &
val) | ((!c) & *
this); }
90 {
return ~(*
this ==
v); }
96 {
return ~(*
this <
v); }
98 {
return ~(*
this >
v); }
122 return v4uu((*
this)[0] * r[0],
141 return v4uu((*
this)[0] % r[0],
148 return v4uu((*
this)[0] % r,
166 {
return *
this ==
v4uu(0); }
171 {
return *
this ^
v4uu(0xFFFFFFFF); }
182 operator v4uf()
const;
197 SYS_STATIC_ASSERT_MSG(SYSisPOD<v4uf>(),
"v4uf should be POD, for better performance in UT_Array, etc.");
223 { *
this = (val &
c) | (*
this & ~c); }
294 {
return *
this ^ v4uu(0xFFFFFFFF); }
334 template <
int A,
int B,
int C,
int D>
337 return VM_SHUFFLE<A,B,C,D>(
vector);
389 bitCastIntToFloat(
const v4uu &v) {
return V4SF(v.
vector); }
414 andn(
const v4uu &a,
const v4uf &
b)
420 andn(
const v4uu &a,
const v4uu &b)
427 ternary(
const v4uu &a,
const v4uf &b,
const v4uf &
c)
429 return (b & a) | andn(a, c);
433 ternary(
const v4uu &a,
const v4uu &b,
const v4uu &c)
435 return (b & a) | andn(a, c);
440 nand(
const v4uu &a,
const v4uu &b)
460 return vmax(vmin(a, c), b);
466 return vmax(vmin(a,
v4uf(c)),
v4uf(b));
475 signbits(
const v4uu &a)
477 return vm_signbits(a.
vector);
481 signbits(
const v4uf &a)
483 return vm_signbits(a.
vector);
487 allbits(
const v4uu &a)
489 return vm_allbits(a.
vector);
493 anybits(
const v4uu &a)
505 madd(
const v4uf &v,
float f,
float a)
519 return madd(v, f, -s);
525 return madd(v, f, -s);
532 return madd(a, w1, b*w);
537 float rw,
float gw,
float bw)
546 return res[0] + res[1] + res[2];
553 return res[0] + res[1] + res[2] + res[3];
559 return SYSsqrt(
dot3(a, a));
571 return v4uf(a[1]*b[2] - a[2]*b[1],
572 a[2]*b[0] - a[0]*b[2],
573 a[0]*b[1] - a[1]*b[0], 0);
577 #if defined(AMD64) && (defined(__SSE4_1__) || defined(_MSC_VER))
578 #include <smmintrin.h>
580 VMconvert4F32ToF16(
v4si input)
583 __m128i sign_mask = _mm_set1_epi32(0x80000000);
584 __m128i
sign = _mm_and_si128(sign_mask, input);
585 __m128i positive = _mm_andnot_si128(sign_mask, input);
588 sign = _mm_srli_epi32(sign,16);
591 __m128i min_value = _mm_set1_epi32((127-15)<<23);
592 __m128i
max_value = _mm_set1_epi32((127+16)<<23);
593 positive = _mm_max_epi32(positive, min_value);
594 positive = _mm_min_epi32(positive, max_value);
597 __m128i bit0_mask = _mm_set1_epi32(1);
598 __m128i round_bit = _mm_srli_epi32(positive, (23-10-1));
599 round_bit = _mm_and_si128(round_bit, bit0_mask);
603 __m128i exponent_diff = _mm_set1_epi32(127-15);
604 __m128i exponent = _mm_srli_epi32(positive, 23);
605 exponent = _mm_sub_epi32(exponent, exponent_diff);
606 exponent = _mm_slli_epi32(exponent, 10);
609 __m128i ten_bits_mask = _mm_set1_epi32(0x3FF);
610 positive = _mm_srli_epi32(positive, 23-10);
611 positive = _mm_and_si128(positive, ten_bits_mask);
614 __m128i f16s = _mm_or_si128(exponent, positive);
615 f16s = _mm_or_si128(f16s, sign);
618 f16s = _mm_add_epi32(f16s, round_bit);
623 f16s = _mm_packus_epi32(f16s,f16s);
626 return _mm_cvtsi128_si64(f16s);
630 VMconvert4F32ToF16(
v4si input)
636 for (
int i = 0; i < 4; ++i)
643 VMconvert4F32ToF16(
v4sf input)
645 return VMconvert4F32ToF16(
V4SI(input));
651 SYSmin(
const v4uu &a,
const v4uu &b)
653 return ternary(a < b, a, b);
656 SYSmax(
const v4uu &a,
const v4uu &b)
658 return ternary(a > b, a, b);
661 SYSclamp(
const v4uu &a,
const v4uu &b,
const v4uu &c)
666 SYSclamp(
const v4uu &a,
int b,
int c)
687 return madd(diff, t, a);
694 return madd(diff, t, a);
699 return vmax(vmin(a, c), b);
705 return vmax(vmin(a,
v4uf(c)),
v4uf(b));
733 SYSfastFloor(
const v4uf &a)
763 #define VM_ALIGN(ptr, ASIZE, STYPE) \
764 ((((1<<ASIZE)-(intptr_t)ptr)&((1<<ASIZE)-1))/sizeof(STYPE))
constexpr auto max_value() -> T
SYS_FORCE_INLINE v4uf operator/(float r) const
SYS_FORCE_INLINE v4uu operator=(const v4uu &v)
SYS_FORCE_INLINE v4uf operator+=(float r)
Mat3< typename promote< S, T >::type > operator*(S scalar, const Mat3< T > &m)
Multiply each element of the given matrix by scalar and return the result.
SYS_API double cos(double x)
SYS_FORCE_INLINE v4uf operator&&(const v4uf &r) const
SYS_FORCE_INLINE v4uu operator>=(const v4uu &v) const
SYS_FORCE_INLINE v4uu operator=(v4si v)
SYS_FORCE_INLINE v4uf operator^(const v4uu &r) const
SYS_FORCE_INLINE v4uf(const v4uf &v) noexcept
#define SYS_STATIC_ASSERT_MSG(expr, msg)
SYS_FORCE_INLINE v4uu operator||(const v4uu &r) const
SYS_FORCE_INLINE v4uf operator-() const
#define VM_SHIFTRIGHT(A, C)
SYS_FORCE_INLINE v4uu operator&&(const v4uu &r) const
Mat3< typename promote< T0, T1 >::type > operator+(const Mat3< T0 > &m0, const Mat3< T1 > &m1)
Add corresponding elements of m0 and m1 and return the result.
SYS_FORCE_INLINE v4uu operator-(const v4uu &r) const
SYS_FORCE_INLINE v4uu operator*(int32 r) const
virtual bool lerp(GA_AttributeOperand &d, GA_AttributeOperand &a, GA_AttributeOperand &b, GA_AttributeOperand &t) const
d = SYSlerp(a, b, t);
SYS_FORCE_INLINE v4uu toUnsignedInt() const
This is a lie, it is a signed int.
vfloat4 sqrt(const vfloat4 &a)
SYS_FORCE_INLINE v4uu(const int32 v[4])
SYS_FORCE_INLINE v4uu operator%=(const v4uu &r)
SYS_FORCE_INLINE v4uf operator=(v4sf v) noexcept
OIIO_HOSTDEVICE void sincos(float x, float *sine, float *cosine)
SYS_FORCE_INLINE v4uu operator*=(int32 r)
SYS_FORCE_INLINE v4uu operator+(const v4uu &r) const
GLboolean GLboolean GLboolean GLboolean a
SYS_FORCE_INLINE v4uf operator=(float v)
SYS_FORCE_INLINE v4uu operator==(const v4uu &v) const
GLuint GLsizei GLsizei * length
SYS_FORCE_INLINE v4uu operator<(const v4uu &v) const
SYS_FORCE_INLINE v4uu(int32 v)
SYS_FORCE_INLINE v4uu operator%(const v4uu &r) const
SYS_FORCE_INLINE void operator|=(const v4uu &r)
OIIO_FORCEINLINE OIIO_HOSTDEVICE float msub(float a, float b, float c)
Fused multiply and subtract: (a*b - c)
**But if you need a result
SYS_FORCE_INLINE void condAssign(const v4uu &val, const v4uu &c)
UT_Matrix2T< T > SYSlerp(const UT_Matrix2T< T > &v1, const UT_Matrix2T< T > &v2, S t)
SYS_FORCE_INLINE v4uu operator!() const
SYS_FORCE_INLINE v4uf operator-(float r) const
SYS_FORCE_INLINE v4uu operator+=(int32 r)
float dot3(const vfloat4 &a, const vfloat4 &b)
Return the float 3-component dot (inner) product of a and b.
SYS_FORCE_INLINE v4uf abs() const
SYS_FORCE_INLINE v4uf operator|(const v4uf &r) const
SYS_FORCE_INLINE v4uf operator*=(float r)
SYS_FORCE_INLINE void store(float v[4]) const
SYS_FORCE_INLINE v4uf operator/(const v4uf &r) const
SYS_FORCE_INLINE v4uf(float a, float b, float c, float d)
SYS_FORCE_INLINE v4uf recip() const
SYS_FORCE_INLINE v4uu operator&(const v4uu &r) const
Mat3< typename promote< T0, T1 >::type > operator-(const Mat3< T0 > &m0, const Mat3< T1 > &m1)
Subtract corresponding elements of m0 and m1 and return the result.
SYS_FORCE_INLINE v4uu operator+=(const v4uu &r)
SYS_FORCE_INLINE v4uu operator^(const v4uu &r) const
SYS_FORCE_INLINE v4uu operator=(int32 v)
SYS_FORCE_INLINE v4uf() noexcept=default
SYS_FORCE_INLINE v4uu(const v4si &v)
SYS_FORCE_INLINE v4uu toSignedInt() const
SYS_FORCE_INLINE v4uf operator&(const v4uf &r) const
SYS_FORCE_INLINE v4uu operator<=(const v4uf &v) const
SYS_FORCE_INLINE v4uf operator=(const v4uf &v) noexcept
SYS_FORCE_INLINE v4uf operator!() const
UT_Vector3T< T > SYSclamp(const UT_Vector3T< T > &v, const UT_Vector3T< T > &min, const UT_Vector3T< T > &max)
SYS_FORCE_INLINE v4uf operator||(const v4uf &r) const
SYS_FORCE_INLINE v4uu operator>(const v4uf &v) const
SYS_FORCE_INLINE void setComp(int idx, float v)
SYS_FORCE_INLINE void store(int32 v[4]) const
OIIO_FORCEINLINE OIIO_HOSTDEVICE float madd(float a, float b, float c)
Fused multiply and add: (a*b + c)
SYS_FORCE_INLINE v4uf(const float v[4])
SYS_FORCE_INLINE v4uf clamp(float low, float high) const
SYS_FORCE_INLINE v4uu operator==(const v4uf &v) const
SYS_FORCE_INLINE v4uu operator*(const v4uu &r) const
SYS_FORCE_INLINE v4uu operator%(int r) const
SYS_FORCE_INLINE v4uu operator-=(int32 r)
SYS_API fpreal32 SYSfloor(fpreal32 val)
SYS_FORCE_INLINE v4uf operator^(const v4uf &r) const
SYS_FORCE_INLINE v4uu operator-(int32 r) const
SYS_FORCE_INLINE float operator[](int idx) const
SYS_FORCE_INLINE void operator^=(const v4uu &r)
IMATH_HOSTDEVICE constexpr int sign(T a) IMATH_NOEXCEPT
SYS_FORCE_INLINE void condAssign(const v4uf &val, const v4uu &c)
SYS_FORCE_INLINE v4uu operator<(const v4uf &v) const
GLboolean GLboolean GLboolean b
SYS_FORCE_INLINE v4uu operator!=(const v4uu &v) const
SYS_FORCE_INLINE v4uf operator*(float r) const
SYS_FORCE_INLINE v4uu operator>=(const v4uf &v) const
SYS_FORCE_INLINE v4uu operator|(const v4uu &r) const
SYS_FORCE_INLINE v4uf operator*(const v4uf &r) const
SYS_FORCE_INLINE v4uf operator-(const v4uf &r) const
SYS_FORCE_INLINE void setComp(int idx, int32 v)
SYS_FORCE_INLINE v4uf operator&(const v4uu &r) const
#define VM_SHIFTLEFT(A, C)
SYS_API double tan(double x)
SYS_FORCE_INLINE v4uu operator%=(int r)
SYS_FORCE_INLINE v4uu operator-=(const v4uu &r)
SYS_FORCE_INLINE v4uu operator<=(const v4uu &v) const
SYS_FORCE_INLINE v4uu operator>(const v4uu &v) const
SYS_FORCE_INLINE v4uf operator&&(const v4uu &r) const
SYS_FORCE_INLINE v4uf clamp(const v4uf &low, const v4uf &high) const
SYS_FORCE_INLINE v4uu(int32 a, int32 b, int32 c, int32 d)
SYS_FORCE_INLINE v4uf swizzle() const
SYS_FORCE_INLINE v4uu operator<<(int32 c) const
GLubyte GLubyte GLubyte GLubyte w
SYS_FORCE_INLINE int32 operator[](int idx) const
SYS_FORCE_INLINE v4uu operator+(int32 r) const
SYS_FORCE_INLINE v4uf operator/=(float r)
SYS_FORCE_INLINE v4uf(float v)
SYS_FORCE_INLINE v4uu operator!=(const v4uf &v) const
SYS_FORCE_INLINE v4uf operator-=(const v4uf &r)
SYS_FORCE_INLINE v4uf operator~() const
SYS_FORCE_INLINE void operator&=(const v4uu &r)
SYS_FORCE_INLINE v4uf operator||(const v4uu &r) const
SYS_FORCE_INLINE v4uf operator+(float r) const
SYS_FORCE_INLINE v4uf operator*=(const v4uf &r)
SYS_FORCE_INLINE v4uf operator+=(const v4uf &r)
SYS_FORCE_INLINE v4uu operator>>(int32 c) const
IMATH_HOSTDEVICE constexpr Quat< T > operator/(const Quat< T > &q1, const Quat< T > &q2) IMATH_NOEXCEPT
Quaterion division.
SYS_FORCE_INLINE v4uf operator/=(const v4uf &r)
SYS_FORCE_INLINE v4uf operator-=(float r)
SYS_FORCE_INLINE v4uu(const v4uu &v)
SYS_FORCE_INLINE v4uf operator+(const v4uf &r) const
SYS_FORCE_INLINE v4uu operator~() const
SIM_DerVector3 cross(const SIM_DerVector3 &lhs, const SIM_DerVector3 &rhs)
SYS_FORCE_INLINE v4uf(const v4sf &v) noexcept
SYS_API double sin(double x)
constexpr T normalize(UT_FixedVector< T, D > &a) noexcept
SYS_FORCE_INLINE v4uu isFinite() const
SYS_FORCE_INLINE v4uf operator|(const v4uu &r) const
SYS_FORCE_INLINE v4uu operator*=(const v4uu &r)