55 if (myOwnData && myVector)
56 free(myVector + myNL);
65 if (myOwnData && myVector)
66 free(myVector + myNL);
70 myVector = &v.myVector[nl] - 1;
74 template <
typename T,
typename S>
78 const exint n = nh - nl + 1;
85 for (i = 0; i <
n; i++)
93 UT_ASSERT_P(nl >= myNL && nh <= myNH && (nh - nl + 1) >= 0);
94 utAssign(data, myVector, nl, nh);
101 UT_ASSERT_P(nl >= myNL && nh <= myNH && (nh - nl + 1) >= 0);
102 utAssign(data, myVector, nl, nh);
105 template <
typename T>
111 if (myVector && myOwnData)
113 if (nh-nl > myNH-myNL)
115 myVector = (
T *)realloc(myVector + myNL, (nh - nl + 1)*
sizeof(
T));
116 myVector = myVector - nl;
120 myVector = myVector + myNL - nl;
125 myVector = (
T *)malloc((nh - nl + 1)*
sizeof(
T));
126 myVector = myVector - nl;
134 template <
typename T>
144 template <
typename T>
153 template <
typename T>
157 UT_ASSERT_P(nl >= myNL && nh <= myNH && (nh - nl + 1) >= 0);
163 memset(myVector + start, 0, (end - start) *
sizeof(
T));
166 template <
typename T>
171 UT_ASSERT_P(nl >= myNL && nh <= myNH && (nh - nl + 1) >= 0);
201 template <
typename T>
208 constantInternal(nl, nh, c);
211 template <
typename T>
215 v.
x() = (*this)(idx);
216 v.
y() = (*this)(idx+1);
219 template <
typename T>
223 (*this)(idx) = v.
x();
224 (*this)(idx+1) = v.
y();
227 template <
typename T>
231 v.
x() = (*this)(idx);
232 v.
y() = (*this)(idx+1);
233 v.
z() = (*this)(idx+2);
236 template <
typename T>
240 (*this)(idx) = v.
x();
241 (*this)(idx+1) = v.
y();
242 (*this)(idx+2) = v.
z();
245 template <
typename T>
249 v.
x() = (*this)(idx);
250 v.
y() = (*this)(idx+1);
251 v.
z() = (*this)(idx+2);
252 v.
w() = (*this)(idx+3);
255 template <
typename T>
259 (*this)(idx) = v.
x();
260 (*this)(idx+1) = v.
y();
261 (*this)(idx+2) = v.
z();
262 (*this)(idx+3) = v.
w();
265 template <
typename T>
269 exint diff = myNL-nl;
276 template <
typename T>
280 exint nblocks = (
length()+PARALLEL_BLOCK_SIZE-1)/PARALLEL_BLOCK_SIZE;
286 normInternal((
fpreal64*)accumulators, type);
292 for (
exint i = 1; i < nblocks; ++i)
301 for (
exint i = 1; i < nblocks; ++i)
302 result += accumulators[i];
306 result = SYSsqrt(result);
313 template <
typename T>
317 exint nblocks = (
length()+PARALLEL_BLOCK_SIZE-1)/PARALLEL_BLOCK_SIZE;
323 normInternal((
fpreal64*)accumulators, 2);
326 for (
exint i = 1; i < nblocks; ++i)
327 result += accumulators[i];
332 template <
typename T>
339 getPartialBlockRange(startblock, endblock, PARALLEL_BLOCK_SIZE, info);
341 exint i = startblock*PARALLEL_BLOCK_SIZE + myNL;
345 for (
exint block = startblock; block < endblock; ++block)
347 exint blockend =
SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
349 for (++i; i < blockend; ++i)
360 for (
exint block = startblock; block < endblock; ++block)
362 exint blockend =
SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
364 for (++i; i < blockend; ++i)
366 result +=
SYSabs(myVector[i]);
373 for (
exint block = startblock; block < endblock; ++block)
375 exint blockend =
SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
378 for (++i; i < blockend; ++i)
388 template <
typename T>
394 exint nblocks = (
length()+PARALLEL_BLOCK_SIZE-1)/PARALLEL_BLOCK_SIZE;
400 distance2Internal((
fpreal64*)accumulators, v);
403 for (
exint i = 1; i < nblocks; ++i)
404 result += accumulators[i];
409 template <
typename T>
416 getPartialBlockRange(startblock, endblock, PARALLEL_BLOCK_SIZE, info);
418 exint i = startblock*PARALLEL_BLOCK_SIZE + myNL;
420 for (
exint block = startblock; block < endblock; ++block)
422 exint blockend =
SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
423 fpreal64 diff = myVector[i] - v.myVector[i];
425 for (++i; i < blockend; ++i)
427 diff = myVector[i] - v.myVector[i];
434 template <
typename T>
440 getPartialRange(i, end, info);
444 myVector[i] = -myVector[i];
451 for (
exint j = i, jend = end;
j < jend;
j++)
452 myVector[
j] = -myVector[
j];
456 template <
typename T>
462 getPartialRange(i, end, info);
466 myVector[i] = v.myVector[i] - myVector[i];
474 myVector[
j] = v.myVector[
j] - myVector[
j];
478 template <
typename T>
485 getPartialRange(i, end, info);
489 myVector[i] += s * v.myVector[i];
497 myVector[
j] += s * v.myVector[
j];
501 template <
typename T>
515 exint nblocks = (
length()+PARALLEL_BLOCK_SIZE-1)/PARALLEL_BLOCK_SIZE;
524 addScaledVecNorm2Internal(s, v, (
fpreal64*)accumulators);
527 for (
exint i = 1; i < nblocks; ++i)
528 result += accumulators[i];
533 template <
typename T>
540 getPartialBlockRange(startblock, endblock, PARALLEL_BLOCK_SIZE, info);
542 exint i = startblock*PARALLEL_BLOCK_SIZE + myNL;
544 for (
exint block = startblock; block < endblock; ++block)
546 exint blockend =
SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
547 T val = myVector[i] + s*v.myVector[i];
550 for (++i; i < blockend; ++i)
552 val = myVector[i] + s*v.myVector[i];
560 template <
typename T>
568 if (!norm2 || normlimit <= 0)
578 exint nblocks = (
length()+PARALLEL_BLOCK_SIZE-1)/PARALLEL_BLOCK_SIZE;
587 addScaledVecNorm2UpToInternal(s, v, (
fpreal64*)accumulators, normlimit);
590 for (
exint i = 1; i < nblocks; ++i)
591 result += accumulators[i];
596 template <
typename T>
603 getPartialBlockRange(startblock, endblock, PARALLEL_BLOCK_SIZE, info);
605 exint i = startblock*PARALLEL_BLOCK_SIZE + myNL;
607 for (
exint block = startblock; block < endblock; ++block)
609 const exint blockend =
SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
612 for (; i < normmax; ++i)
614 T val = myVector[i] + s*v.myVector[i];
618 for (; i < blockend; ++i)
620 myVector[i] += s*v.myVector[i];
626 template <
typename T>
633 getPartialRange(i, end, info);
637 myVector[i] = s * myVector[i] + v.myVector[i];
645 myVector[
j] = s * myVector[
j] + v.myVector[
j];
649 template <
typename T>
656 getPartialRange(i, end, info);
660 myVector[i] = a.myVector[i] * b.myVector[i];
668 myVector[
j] = a.myVector[
j] * b.myVector[
j];
672 template <
typename T>
687 exint nblocks = (
length()+PARALLEL_BLOCK_SIZE-1)/PARALLEL_BLOCK_SIZE;
696 multSetAndDotUpToInternal(a, b, (
fpreal64*)accumulators, dotlimit);
699 for (
exint i = 1; i < nblocks; ++i)
700 result += accumulators[i];
705 template <
typename T>
714 getPartialBlockRange(startblock, endblock, PARALLEL_BLOCK_SIZE, info);
716 exint i = startblock*PARALLEL_BLOCK_SIZE + myNL;
718 for (
exint block = startblock; block < endblock; ++block)
720 const exint blockend =
SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
723 for (; i < dotmax; ++i)
725 T av = a.myVector[i];
726 T bv = b.myVector[i];
731 for (; i < blockend; ++i)
733 myVector[i] += a.myVector[i]*b.myVector[i];
739 template <
typename T>
746 getPartialRange(i, end, info);
750 myVector[i] = a.myVector[i] / b.myVector[i];
758 myVector[
j] = a.myVector[
j] / b.myVector[
j];
762 template <
typename T>
769 getPartialRange(i, end, info);
774 myVector[i] = SYSsafediv(
T(1.0), a.myVector[i]);
782 for (
exint j = i, jend = end;
j < jend;
j++)
783 myVector[
j] = SYSsafediv(
T(1.0), a.myVector[
j]);
787 template <
typename T>
794 getPartialRange(i, end, info);
799 myVector[i] =
T(1.0) / a.myVector[i];
807 for (
exint j = i, jend = end;
j < jend;
j++)
808 myVector[
j] =
T(1.0) / a.myVector[
j];
812 template <
typename T>
821 template <
typename T>
829 getPartialRange(i, end, info);
831 memcpy(&myVector[i], &v.myVector[i],
832 (end - i) *
sizeof(T));
835 template <
typename T>
841 for (i=myNL; i<=myNH; i++)
842 myVector[i] += v.myVector[i];
846 template <
typename T>
852 for (i=myNL; i<=myNH; i++)
853 myVector[i] -= v.myVector[i];
857 template <
typename T>
863 for (i=myNL; i<=myNH; i++)
864 myVector[i] *= v.myVector[i];
868 template <
typename T>
874 for (i=myNL; i<=myNH; i++)
875 myVector[i] /= v.myVector[i];
879 template <
typename T>
885 for (i=myNL; i<=myNH; i++)
886 myVector[i] *= scalar;
891 template <
typename T>
897 scalar = 1.0F / scalar;
898 for (i=myNL; i<=myNH; i++)
899 myVector[i] *= scalar;
904 template <
typename T>
910 exint nblocks = (
length()+PARALLEL_BLOCK_SIZE-1)/PARALLEL_BLOCK_SIZE;
916 dotInternal((
fpreal64*)accumulators, v);
919 for (
exint i = 1; i < nblocks; ++i)
920 result += accumulators[i];
925 template <
typename T>
931 getPartialBlockRange(startblock, endblock, PARALLEL_BLOCK_SIZE, info);
933 exint i = startblock*PARALLEL_BLOCK_SIZE + myNL;
935 for (
exint block = startblock; block < endblock; ++block)
937 exint blockend =
SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
938 fpreal64 result = myVector[i]*v.myVector[i];
939 for (++i; i < blockend; ++i)
941 result += myVector[i]*v.myVector[i];
947 template <
typename T>
953 for (i=myNL; i<=myNH; i++)
954 os << i <<
": " << myVector[i] <<
"\n";
959 template <
typename T>
963 if (getNL() != v.
getNL() || getNH() != v.
getNH())
965 for (
exint i = getNL(); i < getNH(); i++)
967 if (!SYSalmostEqual((*
this)(i),
v(i), ulps))
973 template <
typename T>
977 for (
exint i = myNL; i <= myNH; i++)
985 template <
typename T>
991 std::cerr <<
"NAN found in UT_VectorT\n";
1008 getPartialRange(i, end, info);
1010 VM_Math::negate(&myVector[i], &myVector[i], end-i);
1019 getPartialRange(i, end, info);
1021 VM_Math::scaleoffset(&myVector[i], -1.0F, &v.myVector[i], end-i);
1032 getPartialRange(i, end, info);
1047 getPartialBlockRange(startblock, endblock, PARALLEL_BLOCK_SIZE, info);
1049 exint i = startblock*PARALLEL_BLOCK_SIZE + myNL;
1051 for (
exint block = startblock; block < endblock; ++block)
1053 const exint blockend =
SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
1055 fpreal64 result = VM_Math::maddAndNorm(&myVector[i], &v.myVector[i], s, blockend-i);
1074 getPartialBlockRange(startblock, endblock, PARALLEL_BLOCK_SIZE, info);
1076 exint i = startblock*PARALLEL_BLOCK_SIZE + myNL;
1078 for (
exint block = startblock; block < endblock; ++block)
1080 const exint blockend =
SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
1085 const exint normmax =
SYSmin(blockend, normlimit);
1086 result = VM_Math::maddAndNorm(&myVector[i], &v.myVector[i], s, normmax-i);
1109 getPartialRange(i, end, info);
1111 VM_Math::scaleoffset(&myVector[i], s, &v.myVector[i], end-i);
1123 getPartialRange(i, end, info);
1125 VM_Math::mul(&myVector[i], &a.myVector[i], &b.myVector[i], end-i);
1138 getPartialBlockRange(startblock, endblock, PARALLEL_BLOCK_SIZE, info);
1140 exint i = startblock*PARALLEL_BLOCK_SIZE + myNL;
1142 for (
exint block = startblock; block < endblock; ++block)
1144 const exint blockend =
SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
1151 result = VM_Math::mulAndDotDA(&myVector[i], &a.myVector[i], &b.myVector[i], dotmax-i);
1155 VM_Math::mul(&myVector[i], &a.myVector[i], &b.myVector[i], blockend-i);
1158 VM_Math::mul(&myVector[i], &a.myVector[i], &b.myVector[i], blockend-i);
1174 getPartialRange(i, end, info);
1176 VM_Math::div(&myVector[i], &a.myVector[i], &b.myVector[i], end-i);
1186 getPartialRange(i, end, info);
1188 VM_Math::safediv(&myVector[i], 1.0
f, &a.myVector[i], end-i);
1198 getPartialRange(i, end, info);
1207 VM_Math::add(&myVector[myNL], &myVector[myNL], &v.myVector[myNL], myNH-myNL+1);
1215 VM_Math::sub(&myVector[myNL], &myVector[myNL], &v.myVector[myNL], myNH-myNL+1);
1223 VM_Math::mul(&myVector[myNL], &myVector[myNL], &v.myVector[myNL], myNH-myNL+1);
1231 VM_Math::div(&myVector[myNL], &myVector[myNL], &v.myVector[myNL], myNH-myNL+1);
1239 VM_Math::mul(&myVector[myNL], &myVector[myNL], scalar, myNH-myNL+1);
1247 VM_Math::div(&myVector[myNL], &myVector[myNL], scalar, myNH-myNL+1);
1257 getPartialBlockRange(startblock, endblock, PARALLEL_BLOCK_SIZE, info);
1259 exint i = startblock*PARALLEL_BLOCK_SIZE + myNL;
1261 for (
exint block = startblock; block < endblock; ++block)
1263 const exint blockend =
SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
1281 getPartialBlockRange(startblock, endblock, PARALLEL_BLOCK_SIZE, info);
1283 exint i = startblock*PARALLEL_BLOCK_SIZE + myNL;
1287 for (
exint block = startblock; block < endblock; ++block)
1289 exint blockend =
SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
1291 for (++i; i < blockend; ++i)
1302 for (
exint block = startblock; block < endblock; ++block)
1304 exint blockend =
SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
1306 for (++i; i < blockend; ++i)
1308 result +=
SYSabs(myVector[i]);
1315 for (
exint block = startblock; block < endblock; ++block)
1317 exint blockend =
SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
1329 template <
typename V>
inline size_t
1334 size_t n = writer(
"[", 1);
1337 n += writer(
"]", 1);
1342 #define VECTOR_INSTANTIATE_FMT(T) \
1343 template UT_API size_t format<T>(char*, size_t, const UT_VectorT<T>&);
1352 template <typename T>
1357 myVector = (
T *)malloc((nh - nl + 1)*
sizeof(
T));
1358 myVector = myVector - nl;
1361 template <
typename T>
1364 free(myVector + myNL);
1367 template <
typename T>
1373 template <
typename T>
1379 free(myVector + myNL);
1385 template <
typename T>
1392 myVector = (
T *)malloc((myNH - myNL + 1)*
sizeof(
T));
1393 myVector = myVector - myNL;
1395 memcpy(myVector + myNL, p.myVector + myNL, (myNH - myNL + 1)*
sizeof(T));
1398 template <
typename T>
1402 memset(myVector + myNL, 0, (myNH - myNL + 1)*
sizeof(
T));
1405 template <
typename T>
1409 exint diff = myNL-nl;
UT_PermutationT< T > & operator=(const UT_PermutationT< T > &p)
void getPartialRange(exint &start, exint &end, const UT_JobInfo &info) const
Determines the [start,end) interval that should be worked on.
GA_API const UT_StringHolder div
GLenum GLuint GLsizei bufsize
void negPartial(const UT_JobInfo &info)
Negate.
#define VECTOR_INSTANTIATE_FMT(T)
void divideWork(int units, int &start, int &end) const
void init(exint nl, exint nh)
Initialize nl, nh and allocate space.
UT_VectorT & operator+=(const UT_VectorT< T > &v)
constexpr SYS_FORCE_INLINE T & y() noexcept
exint getNH() const
Get the high index.
constexpr SYS_FORCE_INLINE T & z() noexcept
constexpr bool SYSisNan(const F f)
GLboolean GLboolean GLboolean GLboolean a
void getPartialBlockRange(exint &startblock, exint &endblock, const exint blocksize, const UT_JobInfo &info) const
void addScaledVecNorm2(T s, const UT_VectorT< T > &v, fpreal64 *norm2)
Add scaled vector and compute the squared L2 norm.
GLuint GLsizei GLsizei * length
void constantInternalPartial(exint nl, exint nh, T c, const UT_JobInfo &info)
**But if you need a result
T dot(const UT_VectorT< T > &v) const
void multSetAndDotUpTo(const UT_VectorT< T > &a, const UT_VectorT< T > &b, fpreal64 *dot_aba, exint dotlimit)
void addScaledVecNorm2UpTo(T s, const UT_VectorT< T > &v, fpreal64 *norm2, exint normlimit)
constexpr SYS_FORCE_INLINE T & x() noexcept
constexpr SYS_FORCE_INLINE T & x() noexcept
void changeNL(exint nl)
Change the low index, and the high index will adjust itself.
UT_PermutationT(exint nl, exint nh)
ImageBuf OIIO_API sub(Image_or_Const A, Image_or_Const B, ROI roi={}, int nthreads=0)
UT_VectorT & operator/=(const UT_VectorT< T > &v)
void zeroPartial(exint nl, exint nh, const UT_JobInfo &info)
constexpr SYS_FORCE_INLINE T & z() noexcept
UT_VectorT()
Input the index range [nl..nh].
fpreal64 dot(const CE_VectorT< T > &a, const CE_VectorT< T > &b)
std::ostream & save(std::ostream &os) const
Output.
void getSubvector4(UT_Vector4 &v, exint idx) const
T distance2(const UT_VectorT< T > &v) const
size_t format(char *buffer, size_t bufsize, const UT_VectorT< V > &v)
OIIO_FORCEINLINE OIIO_HOSTDEVICE float madd(float a, float b, float c)
Fused multiply and add: (a*b + c)
UT_VectorT & operator-=(const UT_VectorT< T > &v)
T norm2() const
Square of L2-norm.
GLboolean GLboolean GLboolean b
void setSubvector3(exint idx, const UT_Vector3 &v)
static int getInt(UT_IntControl i)
void getSubvector2(UT_Vector2 &v, exint idx) const
void setSubvector4(exint idx, const UT_Vector4 &v)
void setSubvector2(exint idx, const UT_Vector2 &v)
VM_SIV set(fpreal32 *d, const fpreal32 *a, exint num, const uint32 *disabled)
VM_Math::set(d, a, disabled) := d[i] = disabled[i] ? d[i] : a[i].
constexpr SYS_FORCE_INLINE T & w() noexcept
void getSubvector3(UT_Vector3 &v, exint idx) const
void subvector(const UT_VectorT< T > &v, exint nl, exint nh)
Steal from another vector, resulting vector has origin at 1.
void assign(const fpreal32 *data, exint nl, exint nh)
UT_VectorT & operator=(const UT_VectorT< T > &v)
ImageBuf OIIO_API add(Image_or_Const A, Image_or_Const B, ROI roi={}, int nthreads=0)
UT_VectorT & operator*=(const UT_VectorT< T > &v)
Componentwise multiplication & division.
constexpr SYS_FORCE_INLINE T & y() noexcept
exint length() const
Get dimension of this vector.
ImageBuf OIIO_API zero(ROI roi, int nthreads=0)
void constant(T c)
Initialize to the given constant.
* for(int i=0;i< n_subtasks;++i)*tasks.push(pool-> push(myfunc))
constexpr SYS_FORCE_INLINE T & y() noexcept
bool isEqual(const UT_VectorT< T > &v, int64 ulps)
exint getNL() const
Get the low index.
ImageBuf OIIO_API mul(Image_or_Const A, Image_or_Const B, ROI roi={}, int nthreads=0)
constexpr SYS_FORCE_INLINE T & x() noexcept