21 #define VM_SIV static inline void
26 #if defined(CPU_HAS_SIMD_INSTR)
27 #define VM_SIMDFUNC(signature) signature;
28 #define VM_SIMDFUNCR(signature) signature;
30 #define VM_SIMDFUNC(signature) signature {}
31 #define VM_SIMDFUNCR(signature) signature { return 0; }
34 #define VM_DECL_vFvFvF(name, expr) \
35 VM_SIV name(fpreal32 *d, const fpreal32 *a, \
36 const fpreal32 *b, exint num) { \
37 if (theSIMD) name##SIMD(d, a, b, num); \
38 else for (exint n=0; n<num; n++) expr; \
40 VM_SIV name(fpreal64 *d, const fpreal64 *a, \
41 const fpreal64 *b, exint num) { \
42 for (exint n=0; n<num; n++) expr; \
44 VM_SIMDFUNC(static void name##SIMD(fpreal32 *d, const fpreal32 *a, const fpreal32 *b, exint num))
46 #define VM_DECL_vFvFuF(name, expr) \
47 VM_SIV name(fpreal32 *d, const fpreal32 *a, \
48 fpreal32 b, exint num) { \
49 if (theSIMD) name##SIMD(d, a, b, num); \
50 else for (exint n=0; n<num; n++) expr; \
52 VM_SIV name(fpreal64 *d, const fpreal64 *a, \
53 fpreal64 b, exint num) { \
54 for (exint n=0; n<num; n++) expr; \
56 VM_SIMDFUNC(static void name##SIMD(fpreal32 *d, const fpreal32 *a, fpreal32 b, exint num))
58 #define VM_DECL_vFuFvF(name, expr) \
59 VM_SIV name(fpreal32 *d, fpreal32 a, \
60 const fpreal32 *b, exint num) { \
61 if (theSIMD) name##SIMD(d, a, b, num); \
62 else for (exint n=0; n<num; n++) expr; \
64 VM_SIV name(fpreal64 *d, fpreal64 a, const fpreal64 *b, exint num) { \
65 for (exint n=0; n<num; n++) expr; \
67 VM_SIMDFUNC(static void name##SIMD(fpreal32 *d, fpreal32 a, const fpreal32 *b, exint num))
69 #define VM_DECL_vFuFuF(name, expr) \
70 VM_SIV name(fpreal32 *d, fpreal32 a, \
71 fpreal32 b, exint num) { \
72 if (theSIMD) name##SIMD(d, a, b, num); \
73 else for (exint n=0; n<num; n++) expr; \
75 VM_SIV name(fpreal64 *d, fpreal64 a, \
76 fpreal64 b, exint num) { \
77 for (exint n=0; n<num; n++) expr; \
79 VM_SIMDFUNC(static void name##SIMD(fpreal32 *d, fpreal32 a, fpreal32 b, exint num))
81 #define VM_DECL_vFvF(name, expr) \
82 VM_SIV name(fpreal32 *d, const fpreal32 *a, exint num) { \
83 if (theSIMD) name##SIMD(d, a, num); \
84 else for (exint n=0; n<num; n++) expr; \
86 VM_SIV name(fpreal64 *d, const fpreal64 *a, exint num) { \
87 for (exint n=0; n<num; n++) expr; \
89 VM_SIMDFUNC(static void name##SIMD(fpreal32 *d, const fpreal32 *a, exint num))
91 #define VM_DECL_vFuF(name, expr) \
92 VM_SIV name(fpreal32 *d, fpreal32 a, exint num) { \
93 if (theSIMD) name##SIMD(d, a, num); \
94 else for (exint n=0; n<num; n++) expr; \
96 VM_SIMDFUNC(static void name##SIMD(fpreal32 *d, fpreal32 a, exint num))
98 #define VM_DECL_vIvF(name, expr) \
99 VM_SIV name(int32 *d, const fpreal32 *a, exint num) { \
100 if (theSIMD) name##SIMD(d, a, num); \
101 else for (exint n=0; n<num; n++) expr; \
103 VM_SIMDFUNC(static void name##SIMD(int32 *d, const fpreal32 *a, exint num))
105 #define VM_DECL_vIvF_nonconst(name, expr) \
106 VM_SIV name(int32 *d, fpreal32 *a, exint num) { \
107 if (theSIMD) name##SIMD(d, a, num); \
108 else for (exint n=0; n<num; n++) expr; \
110 VM_SIMDFUNC(static void name##SIMD(int32 *d, fpreal32 *a, exint num))
112 #define VM_DECL_vIvFvF(name, expr) \
113 VM_SIV name(int32 *d, const fpreal32 *a, const fpreal32 *b, exint num) { \
114 if (theSIMD) name##SIMD(d, a, b, num); \
115 else for (exint n=0; n<num; n++) expr; \
117 VM_SIV name(int32 *d, const fpreal64 *a, const fpreal64 *b, exint num) { \
118 for (exint n=0; n<num; n++) expr; \
120 VM_SIMDFUNC(static void name##SIMD(int32 *d, const fpreal32 *a, const fpreal32 *b, exint num))
122 #define VM_DECL_vIvVFvF(name, expr) \
123 VM_SIV name(int32 *d, fpreal32 *a, const fpreal32 *b, exint num) { \
124 if (theSIMD) name##SIMD(d, a, b, num); \
125 else for (exint n=0; n<num; n++) expr; \
127 VM_SIMDFUNC(static void name##SIMD(int32 *d, fpreal32 *a, const fpreal32 *b, exint num))
129 #define VM_DECL_vIvFuF(name, expr) \
130 VM_SIV name(int32 *d, const fpreal32 *a, fpreal32 b, exint num) { \
131 if (theSIMD) name##SIMD(d, a, b, num); \
132 else for (exint n=0; n<num; n++) expr; \
134 VM_SIV name(int32 *d, const fpreal64 *a, fpreal64 b, exint num) { \
135 for (exint n=0; n<num; n++) expr; \
137 VM_SIMDFUNC(static void name##SIMD(int32 *d, const fpreal32 *a, fpreal32 b, exint num))
139 #define VM_DECL_WP(type, name, expr) \
140 VM_SIV name(type *d, const fpreal32 *a, fpreal32 b, \
141 fpreal32 c, fpreal32 e, exint num) { \
142 if (theSIMD) name##SIMD(d, a, b, c, e, num); \
143 else for (exint n=0; n<num; n++) expr; \
145 VM_SIMDFUNC(static void name##SIMD(type *d, const fpreal32 *a, fpreal32 b, fpreal32 c, fpreal32 e, exint num))
147 #define VM_DECL_IWP(type, name, expr) \
148 VM_SIV name(fpreal32 *d, const type *a, fpreal32 b, \
149 type e, exint num) { \
150 if (theSIMD) name##SIMD(d, a, b, e, num); \
151 else for (exint n=0; n<num; n++) expr; \
153 VM_SIMDFUNC(static void name##SIMD(fpreal32 *d, const type *a, fpreal32 b, type e, exint num))
155 #define VM_DECL_vIvIvI(name, expr) \
156 VM_SIV name(int32 *d, const int32 *a, const int32 *b, exint num) { \
157 if (theSIMD) name##SIMD(d, a, b, num); \
158 else for (exint n=0; n<num; n++) expr; \
160 VM_SIMDFUNC(static void name##SIMD(int32 *d, const int32 *a, const int32 *b, exint num))
162 #define VM_DECL_vIvIuI(name, expr) \
163 VM_SIV name(int32 *d, const int32 *a, int32 b, exint num) { \
164 if (theSIMD) name##SIMD(d, a, b, num); \
165 else for (exint n=0; n<num; n++) expr; \
167 VM_SIMDFUNC(static void name##SIMD(int32 *d, const int32 *a, int32 b, exint num))
169 #define VM_DECL_uIvI(name, expr) \
170 static inline int32 name##SISD(const int32 *a, exint num) { \
172 for (exint n=0; n < num; n++) expr; \
175 VM_SIMDFUNCR(static int32 name##SIMD(const int32 *a, exint num)) \
176 static inline int32 name(const int32 *a, exint num) { \
177 return theSIMD ? name##SIMD(a, num) : \
178 name##SISD(a, num); \
185 #define VM_DECL_CMP(name, op) \
186 VM_DECL_vIvFvF(name, d[n] = a[n] op b[n]) \
187 VM_DECL_vIvFuF(name, d[n] = a[n] op b) \
188 VM_DECL_vIvFvF(fast##name, d[n] = a[n] op b[n]) \
189 VM_DECL_vIvFuF(fast##name, d[n] = a[n] op b) \
190 VM_DECL_vIvIvI(name, d[n] = a[n] op b[n]) \
191 VM_DECL_vIvIuI(name, d[n] = a[n] op b) \
192 VM_DECL_vIvIvI(fast##name, d[n] = a[n] op b[n]) \
193 VM_DECL_vIvIuI(fast##name, d[n] = a[n] op b)
199 d = (
T)
SYSclamp(v*wpoint + offset, 0.0F, max);
206 d = (
fpreal32)(v - offset) * iwpoint;
259 #if 0 // Turn this off to use reciprocal multiplication
263 if (b == 0) set(d, 0.
f, num);
264 else div(d, a, b, num);
269 {
mul(d, a, 1/b, num); }
272 if (b == 0) set(d, 0.0F, num);
273 else mul(d, a, 1/b, num);
283 { b = 1/
b;
mul(d, a, b, num); }
297 if (theSIMD) clampSIMD(d, a, min, max, num);
298 else for (
exint n=0; n<num; n++) d[n] =
SYSclamp(a[n], min, max);
304 {
return (theSIMD) ? dotSIMD(a, b, n) : dotSISD(a, b, n); }
309 for (i = 0; i <
n; i++)
316 {
return (theSIMD) ? maddAndNormSIMD(d, a, s, n) : maddAndNormSISD(d, a, s, n); }
321 for (i = 0; i <
n; i++)
331 {
return (theSIMD) ? mulAndDotDASIMD(d, a, b, n) : mulAndDotDASISD(d, a, b, n); }
336 for (i = 0; i <
n; i++)
393 VM_DECL_IWP(fpreal32, iwpoint, ::iwpoint<fpreal32>(d[n], a[n], 1.0F/b, e));
394 VM_DECL_IWP(uint8, iwpoint, ::iwpoint<uint8>(d[n], a[n], 1.0F/b, e));
395 VM_DECL_IWP(uint16, iwpoint, ::iwpoint<uint16>(d[n], a[n], 1.0F/b, e));
400 {
for (
exint n=0; n<num; n++) d[n] = a; }
405 { set((
int32 *)d, (
const int32 *)a, num, disabled); }
410 setSIMD(d, a, num, disabled);
414 for (i = 0; i < num; i++)
415 d[i] = disabled[i] ? d[i] : a[i];
430 setSIMD(d, a, num, disabled);
434 for (i = 0; i < num; i++)
435 d[i] = disabled[i] ? d[i] : a;
444 if (theSIMD) swapSIMD(a, b, num);
445 else swapSISD<fpreal32>(
a,
b, num);
449 swapSISD<fpreal64>(
a,
b, num);
451 template <
typename T>
454 for (
exint i = 0; i < num; i++)
470 subSIMD(d, b, a, num);
471 scaleoffsetSIMD(d, t, a, num);
475 for (
exint n=0; n<num; n++)
476 d[n] = a[n] + (b[n]-a[n])*t[
n];
484 mulSIMD (d, a, 1-t, num);
485 maddSIMD(d, b, t, num);
489 for (
exint n=0; n<num; n++)
505 const uint32 *enable_flags=0);
507 const uint32 *enable_flags=0);
509 const uint32 *enable_flags=0);
511 const uint32 *enable_flags=0);
514 const uint32 *enable_flags=0);
516 const uint32 *enable_flags=0);
518 const uint32 *enable_flags=0);
520 const uint32 *enable_flags=0);
524 const uint32 *enable_flags=0);
526 const uint32 *enable_flags=0);
528 const uint32 *enable_flags=0);
GA_API const UT_StringHolder div
#define VM_DECL_IWP(type, name, expr)
#define VM_DECL_vFuFuF(name, expr)
IMATH_HOSTDEVICE constexpr int floor(T x) IMATH_NOEXCEPT
#define VM_DECL_uIvI(name, expr)
#define VM_SIMDFUNC(signature)
virtual bool lerp(GA_AttributeOperand &d, GA_AttributeOperand &a, GA_AttributeOperand &b, GA_AttributeOperand &t) const
d = SYSlerp(a, b, t);
vfloat4 sqrt(const vfloat4 &a)
GLboolean GLboolean GLboolean GLboolean a
void swap(T &lhs, T &rhs)
ImageBuf OIIO_API min(Image_or_Const A, Image_or_Const B, ROI roi={}, int nthreads=0)
UT_Matrix2T< T > SYSlerp(const UT_Matrix2T< T > &v1, const UT_Matrix2T< T > &v2, S t)
GLfloat GLfloat GLfloat GLfloat v3
VM_SIV swapSISD(T *a, T *b, exint num)
VM_SIV set(int32 *d, int32 a, exint num, const uint32 *disabled)
#define VM_DECL_vIvIuI(name, expr)
#define VM_DECL_vFvFvF(name, expr)
ImageBuf OIIO_API sub(Image_or_Const A, Image_or_Const B, ROI roi={}, int nthreads=0)
SYS_FORCE_INLINE const X * cast(const InstancablePtr *o)
#define VM_DECL_vIvVFvF(name, expr)
#define VM_DECL_CMP(name, op)
fpreal64 dot(const CE_VectorT< T > &a, const CE_VectorT< T > &b)
VM_SIV lerp(fpreal32 *d, const fpreal32 *a, const fpreal32 *b, fpreal32 t, exint num)
UT_Vector3T< T > SYSclamp(const UT_Vector3T< T > &v, const UT_Vector3T< T > &min, const UT_Vector3T< T > &max)
#define VM_DECL_vFvFuF(name, expr)
#define VM_DECL_vFvF(name, expr)
OIIO_FORCEINLINE OIIO_HOSTDEVICE float madd(float a, float b, float c)
Fused multiply and add: (a*b + c)
GLboolean GLboolean GLboolean b
#define VM_DECL_vIvF_nonconst(name, expr)
#define VM_SIMDFUNCR(signature)
VM_SIV set(fpreal32 *d, const fpreal32 *a, exint num, const uint32 *disabled)
VM_Math::set(d, a, disabled) := d[i] = disabled[i] ? d[i] : a[i].
ImageBuf OIIO_API max(Image_or_Const A, Image_or_Const B, ROI roi={}, int nthreads=0)
static void forceSIMD(bool onoff)
IMATH_INTERNAL_NAMESPACE_HEADER_ENTER IMATH_HOSTDEVICE constexpr T abs(T a) IMATH_NOEXCEPT
ImageBuf OIIO_API add(Image_or_Const A, Image_or_Const B, ROI roi={}, int nthreads=0)
#define VM_DECL_vFuF(name, expr)
#define VM_DECL_vIvIvI(name, expr)
fpreal32 SYSfloorIL(fpreal32 val)
#define VM_DECL_vIvF(name, expr)
VM_SIV set(int32 *d, const int32 *a, exint num, const uint32 *disabled)
VM_SIV swap(fpreal64 *a, fpreal64 *b, exint num)
#define VM_DECL_WP(type, name, expr)
#define VM_DECL_vFuFvF(name, expr)
ImageBuf OIIO_API mul(Image_or_Const A, Image_or_Const B, ROI roi={}, int nthreads=0)