HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
VM_Math.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  * NAME: VM_Math.h ( VM Library, C++)
7  *
8  * COMMENTS: Vector Math.
9  */
10 
11 #ifndef __VM_Math__
12 #define __VM_Math__
13 
14 #include "VM_API.h"
15 #include <SYS/SYS_Math.h>
16 #include <SYS/SYS_Floor.h>
17 #include <string.h>
18 #include "VM_SIMD.h"
19 
20 
21 #define VM_SIV static inline void
22 
23 // On platforms that don't support vector instructions, we define empty
24 // function signatures for SIMD operations. The runtime check on theSIMD
25 // should prevent them from being called.
26 #if defined(CPU_HAS_SIMD_INSTR)
27 #define VM_SIMDFUNC(signature) signature;
28 #define VM_SIMDFUNCR(signature) signature;
29 #else
30 #define VM_SIMDFUNC(signature) signature {}
31 #define VM_SIMDFUNCR(signature) signature { return 0; }
32 #endif
33 
34 #define VM_DECL_vFvFvF(name, expr) \
35  VM_SIV name(fpreal32 *d, const fpreal32 *a, \
36  const fpreal32 *b, exint num) { \
37  if (theSIMD) name##SIMD(d, a, b, num); \
38  else for (exint n=0; n<num; n++) expr; \
39  } \
40  VM_SIV name(fpreal64 *d, const fpreal64 *a, \
41  const fpreal64 *b, exint num) { \
42  for (exint n=0; n<num; n++) expr; \
43  } \
44  VM_SIMDFUNC(static void name##SIMD(fpreal32 *d, const fpreal32 *a, const fpreal32 *b, exint num))
45 
46 #define VM_DECL_vFvFuF(name, expr) \
47  VM_SIV name(fpreal32 *d, const fpreal32 *a, \
48  fpreal32 b, exint num) { \
49  if (theSIMD) name##SIMD(d, a, b, num); \
50  else for (exint n=0; n<num; n++) expr; \
51  } \
52  VM_SIV name(fpreal64 *d, const fpreal64 *a, \
53  fpreal64 b, exint num) { \
54  for (exint n=0; n<num; n++) expr; \
55  } \
56  VM_SIMDFUNC(static void name##SIMD(fpreal32 *d, const fpreal32 *a, fpreal32 b, exint num))
57 
58 #define VM_DECL_vFuFvF(name, expr) \
59  VM_SIV name(fpreal32 *d, fpreal32 a, \
60  const fpreal32 *b, exint num) { \
61  if (theSIMD) name##SIMD(d, a, b, num); \
62  else for (exint n=0; n<num; n++) expr; \
63  } \
64  VM_SIV name(fpreal64 *d, fpreal64 a, const fpreal64 *b, exint num) { \
65  for (exint n=0; n<num; n++) expr; \
66  } \
67  VM_SIMDFUNC(static void name##SIMD(fpreal32 *d, fpreal32 a, const fpreal32 *b, exint num))
68 
69 #define VM_DECL_vFuFuF(name, expr) \
70  VM_SIV name(fpreal32 *d, fpreal32 a, \
71  fpreal32 b, exint num) { \
72  if (theSIMD) name##SIMD(d, a, b, num); \
73  else for (exint n=0; n<num; n++) expr; \
74  } \
75  VM_SIV name(fpreal64 *d, fpreal64 a, \
76  fpreal64 b, exint num) { \
77  for (exint n=0; n<num; n++) expr; \
78  } \
79  VM_SIMDFUNC(static void name##SIMD(fpreal32 *d, fpreal32 a, fpreal32 b, exint num))
80 
81 #define VM_DECL_vFvF(name, expr) \
82  VM_SIV name(fpreal32 *d, const fpreal32 *a, exint num) { \
83  if (theSIMD) name##SIMD(d, a, num); \
84  else for (exint n=0; n<num; n++) expr; \
85  } \
86  VM_SIV name(fpreal64 *d, const fpreal64 *a, exint num) { \
87  for (exint n=0; n<num; n++) expr; \
88  } \
89  VM_SIMDFUNC(static void name##SIMD(fpreal32 *d, const fpreal32 *a, exint num))
90 
91 #define VM_DECL_vFuF(name, expr) \
92  VM_SIV name(fpreal32 *d, fpreal32 a, exint num) { \
93  if (theSIMD) name##SIMD(d, a, num); \
94  else for (exint n=0; n<num; n++) expr; \
95  } \
96  VM_SIMDFUNC(static void name##SIMD(fpreal32 *d, fpreal32 a, exint num))
97 
98 #define VM_DECL_vIvF(name, expr) \
99  VM_SIV name(int32 *d, const fpreal32 *a, exint num) { \
100  if (theSIMD) name##SIMD(d, a, num); \
101  else for (exint n=0; n<num; n++) expr; \
102  } \
103  VM_SIMDFUNC(static void name##SIMD(int32 *d, const fpreal32 *a, exint num))
104 
105 #define VM_DECL_vIvF_nonconst(name, expr) \
106  VM_SIV name(int32 *d, fpreal32 *a, exint num) { \
107  if (theSIMD) name##SIMD(d, a, num); \
108  else for (exint n=0; n<num; n++) expr; \
109  } \
110  VM_SIMDFUNC(static void name##SIMD(int32 *d, fpreal32 *a, exint num))
111 
112 #define VM_DECL_vIvFvF(name, expr) \
113  VM_SIV name(int32 *d, const fpreal32 *a, const fpreal32 *b, exint num) { \
114  if (theSIMD) name##SIMD(d, a, b, num); \
115  else for (exint n=0; n<num; n++) expr; \
116  } \
117  VM_SIV name(int32 *d, const fpreal64 *a, const fpreal64 *b, exint num) { \
118  for (exint n=0; n<num; n++) expr; \
119  } \
120  VM_SIMDFUNC(static void name##SIMD(int32 *d, const fpreal32 *a, const fpreal32 *b, exint num))
121 
122 #define VM_DECL_vIvVFvF(name, expr) \
123  VM_SIV name(int32 *d, fpreal32 *a, const fpreal32 *b, exint num) { \
124  if (theSIMD) name##SIMD(d, a, b, num); \
125  else for (exint n=0; n<num; n++) expr; \
126  } \
127  VM_SIMDFUNC(static void name##SIMD(int32 *d, fpreal32 *a, const fpreal32 *b, exint num))
128 
129 #define VM_DECL_vIvFuF(name, expr) \
130  VM_SIV name(int32 *d, const fpreal32 *a, fpreal32 b, exint num) { \
131  if (theSIMD) name##SIMD(d, a, b, num); \
132  else for (exint n=0; n<num; n++) expr; \
133  } \
134  VM_SIV name(int32 *d, const fpreal64 *a, fpreal64 b, exint num) { \
135  for (exint n=0; n<num; n++) expr; \
136  } \
137  VM_SIMDFUNC(static void name##SIMD(int32 *d, const fpreal32 *a, fpreal32 b, exint num))
138 
139 #define VM_DECL_WP(type, name, expr) \
140  VM_SIV name(type *d, const fpreal32 *a, fpreal32 b, \
141  fpreal32 c, fpreal32 e, exint num) { \
142  if (theSIMD) name##SIMD(d, a, b, c, e, num); \
143  else for (exint n=0; n<num; n++) expr; \
144  } \
145  VM_SIMDFUNC(static void name##SIMD(type *d, const fpreal32 *a, fpreal32 b, fpreal32 c, fpreal32 e, exint num))
146 
147 #define VM_DECL_IWP(type, name, expr) \
148  VM_SIV name(fpreal32 *d, const type *a, fpreal32 b, \
149  type e, exint num) { \
150  if (theSIMD) name##SIMD(d, a, b, e, num); \
151  else for (exint n=0; n<num; n++) expr; \
152  } \
153  VM_SIMDFUNC(static void name##SIMD(fpreal32 *d, const type *a, fpreal32 b, type e, exint num))
154 
155 #define VM_DECL_vIvIvI(name, expr) \
156  VM_SIV name(int32 *d, const int32 *a, const int32 *b, exint num) { \
157  if (theSIMD) name##SIMD(d, a, b, num); \
158  else for (exint n=0; n<num; n++) expr; \
159  } \
160  VM_SIMDFUNC(static void name##SIMD(int32 *d, const int32 *a, const int32 *b, exint num))
161 
162 #define VM_DECL_vIvIuI(name, expr) \
163  VM_SIV name(int32 *d, const int32 *a, int32 b, exint num) { \
164  if (theSIMD) name##SIMD(d, a, b, num); \
165  else for (exint n=0; n<num; n++) expr; \
166  } \
167  VM_SIMDFUNC(static void name##SIMD(int32 *d, const int32 *a, int32 b, exint num))
168 
169 #define VM_DECL_uIvI(name, expr) \
170  static inline int32 name##SISD(const int32 *a, exint num) { \
171  int32 d = 0; \
172  for (exint n=0; n < num; n++) expr; \
173  return d; \
174  } \
175  VM_SIMDFUNCR(static int32 name##SIMD(const int32 *a, exint num)) \
176  static inline int32 name(const int32 *a, exint num) { \
177  return theSIMD ? name##SIMD(a, num) : \
178  name##SISD(a, num); \
179  } \
180 
181 // Declare the 8 variations of a single comparison operator
182 // - vector and scalar b
183 // - fast and standard
184 // - int32 and fpreal32
185 #define VM_DECL_CMP(name, op) \
186  VM_DECL_vIvFvF(name, d[n] = a[n] op b[n]) \
187  VM_DECL_vIvFuF(name, d[n] = a[n] op b) \
188  VM_DECL_vIvFvF(fast##name, d[n] = a[n] op b[n]) \
189  VM_DECL_vIvFuF(fast##name, d[n] = a[n] op b) \
190  VM_DECL_vIvIvI(name, d[n] = a[n] op b[n]) \
191  VM_DECL_vIvIuI(name, d[n] = a[n] op b) \
192  VM_DECL_vIvIvI(fast##name, d[n] = a[n] op b[n]) \
193  VM_DECL_vIvIuI(fast##name, d[n] = a[n] op b)
194 
195 template <class T>
196 static inline void
197 wpoint(T &d, fpreal32 v, fpreal32 wpoint, fpreal32 max, fpreal32 offset)
198 {
199  d = (T)SYSclamp(v*wpoint + offset, 0.0F, max);
200 }
201 
202 template <class T>
203 static inline void
204 iwpoint(fpreal32 &d, T v, fpreal32 iwpoint, T offset)
205 {
206  d = (fpreal32)(v - offset) * iwpoint;
207 }
208 
210 public:
211  static bool isSIMD() { return theSIMD; }
212 
213  /// The fast operations assume that any non-zero return value is
214  /// interpreted as true. Standard operations always set the result to
215  /// either 0 or 1.
216  ///
217  /// VM_Math::lt(d, a, b) := d[i] = a[i] < b[i]
218  /// VM_Math::lt(d, a, b) := d[i] = a[i] < b
219  /// VM_Math::fastlt(d, a, b) := d[i] = a[i] < b[i]
220  /// VM_Math::fastlt(d, a, b) := d[i] = a[i] < b
221  VM_DECL_CMP(lt, <)
222  VM_DECL_CMP(le, <=)
223  VM_DECL_CMP(gt, >)
224  VM_DECL_CMP(ge, >=)
225  VM_DECL_CMP(eq, ==)
226  VM_DECL_CMP(ne, !=)
227 
228  /// Bitwise operations
229  VM_DECL_vIvIvI(bor, d[n] = a[n] | b[n])
230  VM_DECL_vIvIuI(bor, d[n] = a[n] | b)
231  VM_DECL_vIvIvI(band, d[n] = a[n] & b[n])
232  VM_DECL_vIvIuI(band, d[n] = a[n] & b)
233  VM_DECL_vIvIvI(bxor, d[n] = a[n] ^ b[n])
234  VM_DECL_vIvIuI(bxor, d[n] = a[n] ^ b)
235 
236  /// VM_Math::add(d, a, b) := d[i] = a[i] + b[i]
237  VM_DECL_vFvFvF(add, d[n] = a[n] + b[n])
238  VM_DECL_vFvFuF(add, d[n] = a[n] + b)
239  VM_DECL_vIvIvI(add, d[n] = a[n] + b[n])
240  VM_DECL_vIvIuI(add, d[n] = a[n] + b)
241 
242  /// VM_Math::sub(d, a, b) := d[i] = a[i] - b[i]
243  VM_DECL_vFvFvF(sub, d[n] = a[n] - b[n])
244  VM_DECL_vFvFuF(sub, d[n] = a[n] - b)
245  VM_DECL_vFuFvF(sub, d[n] = a - b[n])
246 
247  /// VM_Math::mul(d, a, b) := d[i] = a[i] * b[i]
248  VM_DECL_vFvFvF(mul, d[n] = a[n] * b[n])
249  VM_DECL_vFvFuF(mul, d[n] = a[n] * b)
250 
251  /// VM_Math::div(d, a, b) := d[i] = a[i] / b[i]
252  VM_DECL_vFvFvF(div, d[n] = a[n] / b[n])
253  VM_DECL_vFuFvF(div, d[n] = a / b[n])
254 
255  /// VM_Math::safediv(d, a, b) := d[i] = b[i] != 0 ? a[i] / b[i] : a[i]
256  VM_DECL_vFvFvF(safediv, d[n] = SYSsafediv(a[n], b[n]))
257  VM_DECL_vFuFvF(safediv, d[n] = SYSsafediv(a, b[n]))
258 
259 #if 0 // Turn this off to use reciprocal multiplication
260  VM_DECL_vFvFuF(div, d[n] = a[n] / b);
261  VM_SIV safediv(fpreal32 *d, const fpreal32 *a, fpreal32 b, exint num)
262  {
263  if (b == 0) set(d, 0.f, num);
264  else div(d, a, b, num);
265  }
266 #else
267  // Multiply by reciprocal rather than dividing by a constant
268  VM_SIV div(fpreal32 *d, const fpreal32 *a, fpreal32 b, exint num)
269  { mul(d, a, 1/b, num); }
270  VM_SIV safediv(fpreal32 *d, const fpreal32 *a, fpreal32 b, exint num)
271  {
272  if (b == 0) set(d, 0.0F, num);
273  else mul(d, a, 1/b, num);
274  }
275 #endif
276 
277  /// VM_Math::fdiv(d, a, b) := d[i] = a[i] * (1.0 / b[i])
278  /// A faster version than div(), but less accurate since it uses the
279  /// reciprocal.
280  VM_DECL_vFvFvF(fdiv, d[n] = a[n] / b[n])
281  VM_DECL_vFuFvF(fdiv, d[n] = a / b[n])
282  VM_SIV fdiv(fpreal32 *d, const fpreal32 *a, fpreal32 b, exint num)
283  { b = 1/b; mul(d, a, b, num); }
284 
285  /// VM_Math::max(d, a, b) := d[i] = SYSmax(a[i] * b[i])
286  VM_DECL_vFvFvF(vmmax, d[n] = SYSmax(a[n], b[n]) )
287  VM_DECL_vFvFuF(vmmax, d[n] = SYSmax(a[n], b) )
288 
289  /// VM_Math::min(d, a, b) := d[i] = SYSmin(a[i] * b[i])
290  VM_DECL_vFvFvF(vmmin, d[n] = SYSmin(a[n], b[n]) )
291  VM_DECL_vFvFuF(vmmin, d[n] = SYSmin(a[n], b) )
292 
293  /// VM_Math::clamp(d, a, b) := d[i] = SYSclamp(a[i], min, max)
294  VM_SIV clamp(fpreal32 *d, const fpreal32 *a,
295  fpreal32 min, fpreal32 max, exint num)
296  {
297  if (theSIMD) clampSIMD(d, a, min, max, num);
298  else for (exint n=0; n<num; n++) d[n] = SYSclamp(a[n], min, max);
299  }
300  VM_SIMDFUNC(static void clampSIMD(fpreal32 *d, const fpreal32 *a, fpreal32 min, fpreal32 max, exint num))
301 
302  /// VM_Math::dot(a,b,n) := return sum(a[i]*b[i], i=0,n)
303  static inline fpreal64 dot(const fpreal32 *a, const fpreal32 *b, exint n)
304  { return (theSIMD) ? dotSIMD(a, b, n) : dotSISD(a, b, n); }
305  static inline double dotSISD(const fpreal32 *a, const fpreal32 *b, exint n)
306  {
307  exint i;
308  double sum = 0;
309  for (i = 0; i < n; i++)
310  sum += a[i]*b[i];
311  return sum;
312  }
313  VM_SIMDFUNCR(static double dotSIMD(const fpreal32 *a, const fpreal32 *b, exint n))
314 
315  static inline double maddAndNorm(fpreal32 *d, const fpreal32 *a, fpreal s, exint n)
316  { return (theSIMD) ? maddAndNormSIMD(d, a, s, n) : maddAndNormSISD(d, a, s, n); }
317  static inline double maddAndNormSISD(fpreal32 *d, const fpreal32 *a, fpreal s, exint n)
318  {
319  exint i;
320  double sum = 0;
321  for (i = 0; i < n; i++)
322  {
323  d[i] += a[i] * s;
324  sum += d[i] * d[i];
325  }
326  return sum;
327  }
328  VM_SIMDFUNCR(static double maddAndNormSIMD(fpreal32 *d, const fpreal32 *a, fpreal32 s, exint n))
329 
330  static inline double mulAndDotDA(fpreal32 *d, const fpreal32 *a, const fpreal32 *b, exint n)
331  { return (theSIMD) ? mulAndDotDASIMD(d, a, b, n) : mulAndDotDASISD(d, a, b, n); }
332  static inline double mulAndDotDASISD(fpreal32 *d, const fpreal32 *a, const fpreal32 *b, exint n)
333  {
334  exint i;
335  double sum = 0;
336  for (i = 0; i < n; i++)
337  {
338  d[i] = a[i] * b[i];
339  sum += d[i] * a[i];
340  }
341  return sum;
342  }
343  VM_SIMDFUNCR(static double mulAndDotDASIMD(fpreal32 *d, const fpreal32 *a, const fpreal32 *s, exint n))
344 
345  /// VM_Math::zerocount(a,n) := return sum(a[i]==0, i=0,n)
346  VM_DECL_uIvI(zerocount, d += a[n] == 0)
347 
348  /// VM_Math::scaleoffset(d, a, b) := d[i] = d[i]*a[i] + b[i]
349  VM_DECL_vFvFvF(scaleoffset, d[n] = d[n]*a[n] + b[n])
350  VM_DECL_vFvFuF(scaleoffset, d[n] = d[n]*a[n] + b)
351  VM_DECL_vFuFvF(scaleoffset, d[n] = d[n]*a + b[n])
352  VM_DECL_vFuFuF(scaleoffset, d[n] = d[n]*a + b)
353 
354  /// VM_Math::madd(d, a, b) := d[i] = d[i] + a[i]*b[i]
355  VM_DECL_vFvFvF(madd, d[n] += a[n]*b[n])
356  VM_DECL_vFvFuF(madd, d[n] += a[n]*b)
357 
358  /// VM_Math::sqrt(d, a) := d[i] = sqrt(a[i]);
359  VM_DECL_vFvF(sqrt, d[n] = SYSsqrt(a[n]) )
360 
361  /// VM_Math::fsqrt(d, a) := d[i] = 1.0 / isqrt(a[i]);
362  /// This is a faster, but far less accurate version of sqrt() since it uses
363  /// the reciprocal sqrt().
364  VM_DECL_vFvF(fsqrt, d[n] = SYSsqrt(a[n]) )
365 
366  /// VM_Math::isqrt(d, a) := d[i] = 1.0 / sqrt(a[i])
367  VM_DECL_vFvF(isqrt, d[n] = 1/SYSsqrt(a[n]) )
368 
369  /// VM_Math::floor(a) := SYSfloorIL(a)
370  VM_DECL_vIvF(floor, d[n] = (int)SYSfloorIL(a[n]) )
371 
372  VM_DECL_vIvF_nonconst(splitFloat, SYSfastSplitFloat(a[n], d[n]) )
373  VM_DECL_vIvVFvF(splitFloat, { a[n] = b[n]; SYSfastSplitFloat(a[n], d[n]); } )
374 
375  /// VM_Math::cast(a) := (int)a
376  VM_DECL_vIvF(cast, d[n] = (int)a[n] )
377 
378  /// VM_Math::negate(d, a) := d[i] = -a[i]
379  VM_DECL_vFvF(negate, d[n] = -a[n] )
380 
381  /// VM_Math::invert(d, a) := d[i] = 1.0 / a[i]
382  VM_DECL_vFvF(invert, d[n] = 1.0 / a[n] )
383 
384  /// VM_Math::abs(d, a) := d[i] = abs(a[i])
385  VM_DECL_vFvF(abs, d[n] = SYSabs(a[n]) )
386 
387  /// VM_Math::wpoint(d,a,b,c,e) := d[i] = SYSclamp(a[i]*b+e+0.5F, 0, c)
388  VM_DECL_WP(fpreal32, wpoint, ::wpoint<fpreal32>(d[n], a[n], b, c, e+0.5F));
389  VM_DECL_WP(uint8, wpoint, ::wpoint<uint8>(d[n], a[n], b, c, e+0.5F));
390  VM_DECL_WP(uint16, wpoint, ::wpoint<uint16>(d[n], a[n], b, c, e+0.5F));
391 
392  /// VM_Math::iwpoint(d,a,b,e) := d[i] = (fpreal32)(a[i]-e)/b;
393  VM_DECL_IWP(fpreal32, iwpoint, ::iwpoint<fpreal32>(d[n], a[n], 1.0F/b, e));
394  VM_DECL_IWP(uint8, iwpoint, ::iwpoint<uint8>(d[n], a[n], 1.0F/b, e));
395  VM_DECL_IWP(uint16, iwpoint, ::iwpoint<uint16>(d[n], a[n], 1.0F/b, e));
396 
397  VM_DECL_vFuF(set, d[n] = a )
398  /// VM_Math::set(d, a) := d[i] = a
399  VM_SIV set(int32 *d, int a, exint num)
400  { for (exint n=0; n<num; n++) d[n] = a; }
401 
402  /// VM_Math::set(d, a, disabled) := d[i] = disabled[i] ? d[i] : a[i]
403  VM_SIV set(fpreal32 *d, const fpreal32 *a, exint num,
404  const uint32 *disabled)
405  { set((int32 *)d, (const int32 *)a, num, disabled); }
406  VM_SIV set(int32 *d, const int32 *a, exint num,
407  const uint32 *disabled)
408  {
409  if (theSIMD)
410  setSIMD(d, a, num, disabled);
411  else
412  {
413  exint i;
414  for (i = 0; i < num; i++)
415  d[i] = disabled[i] ? d[i] : a[i];
416  }
417  }
418  VM_SIMDFUNC(static void setSIMD(int32 *d, const int32 *a, exint num, const uint32 *disabled))
419  VM_SIV set(fpreal32 *d, fpreal32 a, exint num,
420  const uint32 *disabled)
421  {
422  SYS_FPRealUnionF fu;
423  fu.fval = a;
424  set((int32 *)d, fu.ival, num, disabled);
425  }
426  VM_SIV set(int32 *d, int32 a, exint num,
427  const uint32 *disabled)
428  {
429  if (theSIMD)
430  setSIMD(d, a, num, disabled);
431  else
432  {
433  exint i;
434  for (i = 0; i < num; i++)
435  d[i] = disabled[i] ? d[i] : a;
436  }
437  }
438  VM_SIMDFUNC(static void setSIMD(int32 *d, int32 a, exint num,
439  const uint32 *disabled))
440 
441 
443  {
444  if (theSIMD) swapSIMD(a, b, num);
445  else swapSISD<fpreal32>(a, b, num);
446  }
448  {
449  swapSISD<fpreal64>(a, b, num);
450  }
451  template <typename T>
452  VM_SIV swapSISD(T *a, T *b, exint num)
453  {
454  for (exint i = 0; i < num; i++)
455  {
456  T t = a[i];
457  a[i] = b[i];
458  b[i] = t;
459  }
460  }
461  VM_SIMDFUNC(static void swapSIMD(fpreal32 *a, fpreal32 *b, exint num))
462 
463 
464  /// VM_Math::lerp(d, a, b, t) := d[i] = a[i] + (b[i]-a[i])*t[i]
465  VM_SIV lerp(fpreal32 *d, const fpreal32 *a, const fpreal32 *b,
466  const fpreal32 *t, exint num)
467  {
468  if (theSIMD)
469  {
470  subSIMD(d, b, a, num);
471  scaleoffsetSIMD(d, t, a, num);
472  }
473  else
474  {
475  for (exint n=0; n<num; n++)
476  d[n] = a[n] + (b[n]-a[n])*t[n];
477  }
478  }
479  VM_SIV lerp(fpreal32 *d, const fpreal32 *a, const fpreal32 *b,
480  fpreal32 t, exint num)
481  {
482  if (theSIMD)
483  {
484  mulSIMD (d, a, 1-t, num);
485  maddSIMD(d, b, t, num);
486  }
487  else
488  {
489  for (exint n=0; n<num; n++)
490  d[n] = SYSlerp(a[n], b[n], t);
491  }
492  }
493 
494  /// Vector Functions.
495  /// The following methods assume that the values coming in are "vectors".
496  /// The mnemonics are:
497  /// 3 - Vector3 or Matrix3
498  /// 4 - Vector4 or Matrix4
499  /// The enable_flags are an array corresponding 1-1 to the vectors to be
500  /// processed. Unlike typical flags, the vector will be processed
501  /// if the flag is set to 0. This is to match the VEX style calling.
502  /// If the VEX processor mask flag changes, the mask type here should
503  /// change too.
504  static void mulRowVec44(fpreal32 *v4, const fpreal32 m1[4][4], exint nv,
505  const uint32 *enable_flags=0);
506  static void mulRowVec34(fpreal32 *v3, const fpreal32 m1[4][4], exint nv,
507  const uint32 *enable_flags=0);
508  static void mulColVec44(fpreal32 *v4, const fpreal32 m1[4][4], exint nv,
509  const uint32 *enable_flags=0);
510  static void mulColVec34(fpreal32 *v3, const fpreal32 m1[4][4], exint nv,
511  const uint32 *enable_flags=0);
512  /// Multiplication, but treating the matrix as a 3x3 (i.e. no translate)
513  static void mulRowVec44_3(fpreal32 *v4, const fpreal32 m1[4][4], exint nv,
514  const uint32 *enable_flags=0);
515  static void mulRowVec34_3(fpreal32 *v3, const fpreal32 m1[4][4], exint nv,
516  const uint32 *enable_flags=0);
517  static void mulColVec44_3(fpreal32 *v4, const fpreal32 m1[4][4], exint nv,
518  const uint32 *enable_flags=0);
519  static void mulColVec34_3(fpreal32 *v3, const fpreal32 m1[4][4], exint nv,
520  const uint32 *enable_flags=0);
521 
522  // Add a 4-touple to an array of floats assumed to be a 4-tuple
523  static void vadd4u4(fpreal32 *v4, const fpreal32 a[4], exint nv,
524  const uint32 *enable_flags=0);
525  static void vsub4u4(fpreal32 *v4, const fpreal32 a[4], exint nv,
526  const uint32 *enable_flags=0);
527  static void vmul4u4(fpreal32 *v4, const fpreal32 a[4], exint nv,
528  const uint32 *enable_flags=0);
529 
530  static void forceSIMD(bool onoff) { theSIMD = onoff; }
531 
532 private:
533  static bool theSIMD;
534 };
535 
536 #endif
#define SYSmax(a, b)
Definition: SYS_Math.h:1570
GA_API const UT_StringHolder div
#define VM_DECL_IWP(type, name, expr)
Definition: VM_Math.h:147
#define VM_DECL_vFuFuF(name, expr)
Definition: VM_Math.h:69
unsigned short uint16
Definition: SYS_Types.h:38
int int32
Definition: SYS_Types.h:39
GLenum clamp
Definition: glcorearb.h:1234
IMATH_HOSTDEVICE constexpr int floor(T x) IMATH_NOEXCEPT
Definition: ImathFun.h:112
GLboolean invert
Definition: glcorearb.h:549
const GLdouble * v
Definition: glcorearb.h:837
#define VM_DECL_uIvI(name, expr)
Definition: VM_Math.h:169
#define VM_SIMDFUNC(signature)
Definition: VM_Math.h:30
virtual bool lerp(GA_AttributeOperand &d, GA_AttributeOperand &a, GA_AttributeOperand &b, GA_AttributeOperand &t) const
d = SYSlerp(a, b, t);
vfloat4 sqrt(const vfloat4 &a)
Definition: simd.h:7481
#define VM_API
Definition: VM_API.h:10
int64 exint
Definition: SYS_Types.h:125
GLboolean GLboolean GLboolean GLboolean a
Definition: glcorearb.h:1222
GLdouble s
Definition: glad.h:3009
void swap(T &lhs, T &rhs)
Definition: pugixml.cpp:7172
#define SYSabs(a)
Definition: SYS_Math.h:1572
ImageBuf OIIO_API min(Image_or_Const A, Image_or_Const B, ROI roi={}, int nthreads=0)
UT_Matrix2T< T > SYSlerp(const UT_Matrix2T< T > &v1, const UT_Matrix2T< T > &v2, S t)
Definition: UT_Matrix2.h:675
GLfloat GLfloat GLfloat GLfloat v3
Definition: glcorearb.h:819
float fpreal32
Definition: SYS_Types.h:200
VM_SIV swapSISD(T *a, T *b, exint num)
Definition: VM_Math.h:452
VM_SIV set(int32 *d, int32 a, exint num, const uint32 *disabled)
Definition: VM_Math.h:426
double fpreal64
Definition: SYS_Types.h:201
unsigned char uint8
Definition: SYS_Types.h:36
#define VM_DECL_vIvIuI(name, expr)
Definition: VM_Math.h:162
GLdouble n
Definition: glcorearb.h:2008
#define VM_DECL_vFvFvF(name, expr)
Definition: VM_Math.h:34
GLfloat f
Definition: glcorearb.h:1926
GLintptr offset
Definition: glcorearb.h:665
ImageBuf OIIO_API sub(Image_or_Const A, Image_or_Const B, ROI roi={}, int nthreads=0)
SYS_FORCE_INLINE const X * cast(const InstancablePtr *o)
#define VM_DECL_vIvVFvF(name, expr)
Definition: VM_Math.h:122
#define VM_DECL_CMP(name, op)
Definition: VM_Math.h:185
fpreal64 dot(const CE_VectorT< T > &a, const CE_VectorT< T > &b)
Definition: CE_Vector.h:140
VM_SIV lerp(fpreal32 *d, const fpreal32 *a, const fpreal32 *b, fpreal32 t, exint num)
Definition: VM_Math.h:479
UT_Vector3T< T > SYSclamp(const UT_Vector3T< T > &v, const UT_Vector3T< T > &min, const UT_Vector3T< T > &max)
Definition: UT_Vector3.h:1057
#define VM_DECL_vFvFuF(name, expr)
Definition: VM_Math.h:46
#define VM_DECL_vFvF(name, expr)
Definition: VM_Math.h:81
OIIO_FORCEINLINE OIIO_HOSTDEVICE float madd(float a, float b, float c)
Fused multiply and add: (a*b + c)
Definition: fmath.h:413
GLboolean GLboolean GLboolean b
Definition: glcorearb.h:1222
#define VM_DECL_vIvF_nonconst(name, expr)
Definition: VM_Math.h:105
GLdouble t
Definition: glad.h:2397
#define VM_SIMDFUNCR(signature)
Definition: VM_Math.h:31
fpreal64 fpreal
Definition: SYS_Types.h:277
VM_SIV set(fpreal32 *d, const fpreal32 *a, exint num, const uint32 *disabled)
VM_Math::set(d, a, disabled) := d[i] = disabled[i] ? d[i] : a[i].
Definition: VM_Math.h:403
static bool isSIMD()
Definition: VM_Math.h:211
ImageBuf OIIO_API max(Image_or_Const A, Image_or_Const B, ROI roi={}, int nthreads=0)
unsigned int uint32
Definition: SYS_Types.h:40
static void forceSIMD(bool onoff)
Definition: VM_Math.h:530
#define VM_SIV
Definition: VM_Math.h:21
IMATH_INTERNAL_NAMESPACE_HEADER_ENTER IMATH_HOSTDEVICE constexpr T abs(T a) IMATH_NOEXCEPT
Definition: ImathFun.h:26
ImageBuf OIIO_API add(Image_or_Const A, Image_or_Const B, ROI roi={}, int nthreads=0)
#define VM_DECL_vFuF(name, expr)
Definition: VM_Math.h:91
#define VM_DECL_vIvIvI(name, expr)
Definition: VM_Math.h:155
#define SYSmin(a, b)
Definition: SYS_Math.h:1571
fpreal32 SYSfloorIL(fpreal32 val)
Definition: SYS_Floor.h:59
#define VM_DECL_vIvF(name, expr)
Definition: VM_Math.h:98
VM_SIV set(int32 *d, const int32 *a, exint num, const uint32 *disabled)
Definition: VM_Math.h:406
VM_SIV swap(fpreal64 *a, fpreal64 *b, exint num)
Definition: VM_Math.h:447
#define VM_DECL_WP(type, name, expr)
Definition: VM_Math.h:139
#define VM_DECL_vFuFvF(name, expr)
Definition: VM_Math.h:58
ImageBuf OIIO_API mul(Image_or_Const A, Image_or_Const B, ROI roi={}, int nthreads=0)