HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CE_Grid.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  * NAME: CE_Grid.h ( CE Library, C++)
7  *
8  * COMMENTS: Compute Engine Grid.
9  */
10 
11 #ifndef __CE_Grid__
12 #define __CE_Grid__
13 
14 #include "CE_API.h"
15 
16 #ifdef CE_ENABLED
17 
18 #include <UT/UT_VoxelArray.h>
19 #include <UT/UT_String.h>
20 
21 /// This class represents a 3-dimensional array of float values stored on an
22 /// OpenCL device. It is roughly analagous to UT_VoxelArray, although it does
23 /// not support tiling. There are various convenience functions for simplifying
24 /// invocations of OpenCL kernels that operate on grids of values.
25 ///
26 /// CE_Grid supports an arbitrary number of "ghost" cells, which
27 /// are cells at the boundary edges of the grid that can be set according to
28 /// different UT_VoxelBorderType values, as well as arbitrary number of
29 /// "padding" cells. The padding cells can be used to ensure that several
30 /// grids with slightly different resolutions can nonetheless
31 /// have indentical x, y, and z stride values. This makes writing
32 /// kernels that operate on grids that represent face-sampled (MAC) vector
33 /// array much simpler, since the index into the different grids need only be
34 /// calculated once.
35 ///
36 /// Invoking an OpenCL kernel typically involves one of the bind() functions to
37 /// bind the kernel to the range of work items in the grid, assuming one OpenCL
38 /// work item per voxel. There are three different variants of bind(), the
39 /// default assumes the grid is always 3-dimensional; bind2D() which assumes the
40 /// grid has a 2-dimensional axis, and bind2D3D(), which will treat a
41 /// 3-dimensional grid normally, but flatten a 2-d grid for optimal performance
42 /// on OpenCL devices that prefer a large number of work items in the first
43 /// dimension. This last function is the preferred method to call if the grid
44 /// might represent 2D or 3D data.
45 ///
46 /// A simple OpenCL kernel that doubles every value in a CE_Grid might look
47 /// like:
48 /// @code
49 /// void __kernel doubleit(__global float *grid,
50 /// uint offset, uint xstride, uint ystride, uint zstride)
51 /// {
52 /// size_t idx = offset + get_global_id(0) * xstride +
53 /// get_global_id(1) * ystride +
54 /// get_global_id(2) * zstride;
55 /// grid[idx] *= 2;
56 /// }
57 /// @endcode
58 ///
59 /// Because this kernel takes x, y, and z strides, it can be used for 2D and 3D
60 /// data: in the 2D case zstride will always be 0.
61 /// Invoking this kernel then typically looks like:
62 /// @code
63 /// CE_Context *context = CE_Context::getContext();
64 /// cl::Program prog = context->loadProgram("mykernels.cl");
65 /// cl::KernelFunctor doubleit = myGrid.bind2D3D(prog, "doubleit");
66 /// doubleit(myGrid.buffer(), myGrid.getOffset(),
67 /// myGrid.getXStride2D3D(),
68 /// myGrid.getYStride2D3D(),
69 /// myGrid.getZStride2D3D());
70 /// @endcode
72 {
73 public:
74  CE_Grid();
75  CE_Grid(const CE_Grid &src);
76  virtual ~CE_Grid();
77 
78  /// Return the underlying OpenCL buffer that can be used in kernel
79  /// invocations. It allocates this buffer only on demand.
80  const cl::Buffer &buffer() const
81  {
82  if (hasBuffer())
83  return myBuffer;
84  return allocBuffer();
85  }
86 
87  /// If the current OpenCL buffer is valid.
88  bool hasBuffer() const {return (myBuffer() != 0);}
89 
90  /// Size the grid as specified. Note this does not actually allocate
91  /// memory on the OpenCL device.
92  void size(int xres, int yres, int zres,
93  int xghost = 1, int yghost = 1, int zghost = 1,
94  int xpad = 1, int ypad = 1, int zpad = 1);
95 
96  int getXRes() const { return myRes[0]; }
97  int getYRes() const { return myRes[1]; }
98  int getZRes() const { return myRes[2]; }
99 
100  int getRes(int dim) const { return myRes[dim]; }
101  UT_Vector3I getRes() const { return myRes; }
102 
103  UT_Vector3I getGhostRes() const { return myGhostCells; }
104  int getGhostRes(int dim) const { return myGhostCells[dim]; }
105 
106  UT_Vector3I getPadding() const { return myPadding; }
107  int getPadding(int dim) const { return myPadding[dim]; }
108 
109  /// Returns the offset from the beginning of the buffer to the beginning
110  /// of actual data.
111  int getOffset() const
112  {
113  return myGhostCells[0] +
114  myGhostCells[1] * myStrides[1] +
115  myGhostCells[2] * myStrides[2];
116  }
117  /// Returns the offset from the beginning of the buffer to the beginning
118  /// of the data including ghost cells.
119  int getGhostOffset() const;
120 
121  /// Like the identically-named function in UT_VoxelArray, set the values
122  /// that determine grid border behaviour. Note that calling these functions
123  /// does not actually set the ghost cell values; updateBorderCells() does
124  /// that.
125  void setBorder(UT_VoxelBorderType type, fpreal32 t);
126  UT_VoxelBorderType getBorder() const { return myBorderType; }
127  fpreal32 getBorderValue() const { return myBorderValue; }
128  void setBorderScale(fpreal32 scalex,
129  fpreal32 scaley,
130  fpreal32 scalez);
131  fpreal32 getBorderScale(int axis) const
132  { return myBorderScale[axis]; }
133 
134  /// Converts the given voxel coordinates to the normalized [0..1]^3 space,
135  /// similar to the function of the same name on UT_VoxelArray.
136  bool indexToPos(int x, int y, int z, UT_Vector3F &pos) const;
137 
138  /// Invoke a series of kernels to fill the ghost cells along each axis with
139  /// the proper values according to the border value and type.
140  void updateBorderCells() const;
141 
142  /// Returns the number of data voxels in the grid.
143  int64 numVoxels() const
144  { return ((int64)myRes[0]) * myRes[1] * myRes[2]; }
145 
146  /// Returns the number of total voxels in the grid, including ghost cells
147  /// and padding.
149  {
150  return (int64)(myRes[0] + 2 * myGhostCells[0] + myPadding[0]) *
151  (int64)(myRes[1] + 2 * myGhostCells[1] + myPadding[1]) *
152  (int64)(myRes[2] + 2 * myGhostCells[2] + myPadding[2]);
153  }
154 
155  /// Returns the memory required by the entire grid, including ghost cells
156  /// and padding.
158  { return sizeof(fpreal32) * numTotalVoxels(); }
159 
160  /// Initialize the CE_Grid from the supplied UT_VoxelArray. Note that
161  /// in the case that UT_VoxelArray::isConstant(), this will be very fast
162  /// and avoid allocating any actual GPU memory.
163  void initFromVoxels(const UT_VoxelArrayF &src,
164  int xghost = 1, int yghost = 1, int zghost = 1,
165  int xpad = 1, int ypad = 1, int zpad = 1);
166 
167  /// Match destination UT_VoxelArray to this CE_Grid and copy data. Note
168  /// if isConstant() is true, this is very fast. includeGhostCells can be
169  /// set to include the ghost cell values in the sizing and copy operations.
170  void matchAndCopyToVoxels(UT_VoxelArrayF& dest,
171  bool includeGhostCells = false) const;
172 
173  /// Match the src CE_Grid in terms of size and border conditions.
174  void match(const CE_Grid &src);
175 
176  /// Returns true if this CE_Grid matches src in terms of size and
177  /// border conditions.
178  bool isMatching(const CE_Grid &src) const;
179 
180  /// Returns true if this CE_Grid matches src in terms of offset and strides.
181  bool isCongruent(const CE_Grid& src) const;
182 
183  int getXStride() const {return myStrides[0];}
184  int getYStride() const {return myStrides[1];}
185  int getZStride() const {return myStrides[2];}
186  int getStride(int dim) const { return myStrides[dim]; }
187 
188  UT_Vector3I getStrides() const { return myStrides; }
189 
190  /// Returns the stride along axis including ghost cells.
191  int getGhostStride(int axis) const;
192 
193  /// Returns whether the specified axis is 2-dimensional.
194  bool isAxis2D(int axis) const
195  {
196  return (myRes[axis] == 1);
197  }
198 
199  /// Returns the 2-dimensional axis of this grid, or -1 if there is none.
200  int getAxis2D() const {return myAxis2d;}
201 
202  /// When flattening a grid to 2-dimensions, this is the axis to treat as
203  /// the x-axis, usually when using bind2D.
204  static int getXAxis2D(int axis2d)
205  {
206  return (axis2d > 0) ? 0 : 1;
207  }
208 
209  /// When flattening a grid to 2-dimensions, this is the axis to treat as
210  /// the y-axis, usually when using bind2D.
211  static int getYAxis2D(int axis2d)
212  {
213  return (axis2d < 2) ? 2 : 1;
214  }
215 
216  /// 2D strides.
217  int getXStride2D(int axis2d) const {return myStrides[getXAxis2D(axis2d)];}
218  int getYStride2D(int axis2d) const {return myStrides[getYAxis2D(axis2d)];}
219 
220  /// Returns the true x-stride if the grid is 3D, or the flattened x-stride
221  /// if 2D.
222  int getXStride2D3D() const
223  {
224  if (myAxis2d == -1)
225  return myStrides[0];
226  return getXStride2D(myAxis2d);
227  }
228 
229  /// Returns the true Y-stride if the grid is 3D, or the flattened y-stride
230  /// if 2D.
231  int getYStride2D3D() const
232  {
233  if (myAxis2d == -1)
234  return myStrides[1];
235  return getYStride2D(myAxis2d);
236  }
237 
238  /// Returns the true Z-stride if the grid is 3D, or 0 if 2D.
239  int getZStride2D3D() const
240  {
241  if (myAxis2d == -1)
242  return myStrides[2];
243  return 0;
244  }
245 
246  /// Returns the true x resolution if the grid is 3D, or the flattened x
247  /// resolution if 2D.
248  int getXRes2D3D() const
249  {
250  if (myAxis2d == -1)
251  return myRes[0];
252  return myRes[getXAxis2D(myAxis2d)];
253  }
254 
255  /// Returns the true y resolution if the grid is 3D, or the flattened y
256  /// resolution if 2D.
257  int getYRes2D3D() const
258  {
259  if (myAxis2d == -1)
260  return myRes[1];
261  return myRes[getYAxis2D(myAxis2d)];
262  }
263 
264  /// Returns the true z resolution if the grid is 3D, or 1 if 2D.
265  int getZRes2D3D() const
266  {
267  if (myAxis2d == -1)
268  return myRes[2];
269  return 1;
270  }
271 
272  /// Set this grid to the specified constant value. This will release the
273  /// underlying OpenCL buffer and store the constant value in the CE_Grid
274  /// object.
275  void constant(fpreal32 v);
276 
277  void zero() { constant(0); }
278 
279  /// Returns whether the grid is set to a constant value. If checkBorders is
280  /// true, checks that the border type and value are equal as well.
281  bool isConstant(fpreal32 *cval = 0, bool checkBorders = false) const;
282 
283  /// Copy data from the source grid. This requires that the grids have the
284  /// same data resolution, but ghost cells, padding, borders, etc. can differ.
285  void copyData(const CE_Grid &src);
286 
287  /// Assign to this, equivalent to match(src), followed by copyData(src).
288  CE_Grid &operator=(const CE_Grid &src);
289 
290  /// Steal the buffer from the other grid, leaving it unitialized.
291  void stealBuffer(CE_Grid &src);
292 
293  /// Add the source array values to this. Calls linearCombination() and
294  /// requires isCongruent(src).
295  CE_Grid &operator +=(const CE_Grid &src);
296 
297  /// Return an OpenCL NDRange comprised of the entire grid resolution,
298  /// implying one OpenCL work item per voxel.
300  {
301  return cl::NDRange(getXRes(),
302  getYRes(),
303  getZRes());
304  }
305 
306  /// Return an OpenCL NDRange comprised of the flattened 2D grid resolution,
307  /// implying one OpenCL work item per voxel.
308  cl::NDRange getGlobalRange2D(int axis2d) const
309  {
310  return cl::NDRange(myRes[getXAxis2D(axis2d)],
311  myRes[getYAxis2D(axis2d)]);
312  }
313 
314  /// Return an OpenCL NDRange comprised of the entire 3D grid resolution if
315  /// the grid is 3D, else the flattened 2D grid resolution.
317  {
318  return (myAxis2d == -1) ? getGlobalRange() :
319  getGlobalRange2D(myAxis2d);
320  }
321 
322  /// Create a local work item range for the supplied global range.
323  static cl::NDRange getLocalRange(const cl::NDRange &g);
325  { return getLocalRange(getGlobalRange2D3D()); }
326 
327  /// Bind a 3D kernel, with one work item per voxel.
328  /// If a local range is provided, the global range will be padded to ensure
329  /// compliance; in this case, the kernel must manually perform bound checks!
330  /// lrange can point at an array, in which case n specifies its size.
332  const cl::NDRange* lrange = nullptr,
333  int n = 1) const;
334  cl::KernelFunctor bind(cl::Program prog,
335  const char *kernelname) const;
336 
337  /// Bind a 2D kernel, treating the provided axis as the flat one,
338  /// with one work item per voxel. This also allows calling 2d kernels along
339  /// slices of a 3D grid.
340  cl::KernelFunctor bind2D(int axis, cl::Kernel k) const;
341  cl::KernelFunctor bind2D(int axis, cl::Program prog,
342  const char *kernelname) const;
343 
344  /// Bind a 2D-3D kernel, which should take x-, y-, and z-strides as
345  /// as parameters, automatically flattening a 2D grid if necessary.
346  cl::KernelFunctor bind2D3D(cl::Kernel k) const;
347  cl::KernelFunctor bind2D3D(cl::Program prog,
348  const char *kernelname) const;
349 
350  /// Enqueue kernel that stores the linear combination
351  /// g0 + c1 * g1
352  /// where this is g0. g1 is assumed to be center-sampled, and g0 is
353  /// corner-sampled around g1. Thus, g1 is 8-way (4-way for 2D) averaged
354  /// before the operation.
355  /// Requires this grid to have one more voxel per live dimension.
356  void scaledAddCornerFromCenter(fpreal32 c1, const CE_Grid &g1);
357 
358  /// Enqueue kernel that stores the linear combination
359  /// c0 * g0 + d.
360  /// Requires isCongruent(g0).
361  void linearCombination(fpreal32 c0, const CE_Grid &g0, fpreal32 d);
362 
363  /// Enqueue kernel that stores the linear combination
364  /// c0 * g0 + c1 * g1 + d.
365  /// Requires isCongruent(g0) && isCongruent(g1).
366  void linearCombination(fpreal32 c0, const CE_Grid &g0,
367  fpreal32 c1, const CE_Grid &g1,
368  fpreal32 d);
369 
370  /// Enqueue kernel that stores the linear combination
371  /// c0 * g0 + c1 * g1 + c2 * g2 + d
372  /// Requires isCongruent(g0) && isCongruent(g1) && isCongruent(g2)
373  void linearCombination(fpreal32 c0, const CE_Grid &g0,
374  fpreal32 c1, const CE_Grid &g1,
375  fpreal32 c2, const CE_Grid &g2,
376  fpreal32 d);
377 
378  /// Compute scale * divergence of the vector field represented by the
379  /// supplied grids and voxelsize.
380  void divergence(const CE_Grid &x, const CE_Grid &y, const CE_Grid &z,
381  fpreal32 scale, UT_Vector3 voxelSize);
382 
383  /// Compute scale * divergence of the vector field represented by the
384  /// supplied grids and voxelsize; divergence is calculated at corners of
385  /// the center-sampled vector field.
386  void divergenceCenterToCorner(const CE_Grid &x, const CE_Grid &y,
387  const CE_Grid &z, fpreal32 scale,
388  UT_Vector3 voxelSize);
389 
390  /// Add scale * gradient of the supplied field along the supplied axis
391  /// and voxelsize.
392  void applyGradient(const CE_Grid &p, fpreal32 scale,
393  fpreal32 voxelSize, int axis);
394 
395  /// Add scale * gradient of the supplied field along the supplied axis
396  /// and voxelsize. Assumes this grid is center-sampled, and p is sampled
397  /// at its corners.
398  void applyGradientCornerToCenter(const CE_Grid &p, fpreal32 scale,
399  fpreal32 voxelsize, int axis);
400 
401  /// Reductions of the grid to a single value.
402  fpreal64 sum() const;
403  fpreal64 sumAbs() const;
404  fpreal64 sumSqr() const;
405  fpreal64 min() const;
406  fpreal64 minAbs() const;
407  fpreal64 max() const;
408  fpreal64 maxAbs() const;
409  fpreal64 average() const {return sum() / numVoxels();}
410 
411  fpreal64 localAverage(UT_Vector3I &radius);
412  fpreal64 localSum( UT_Vector3I &radius);
413  fpreal64 localSumSqr( UT_Vector3I &radius);
414  fpreal64 localSumAbs( UT_Vector3I &radius);
415  fpreal64 localMin( UT_Vector3I &radius);
416  fpreal64 localMinAbs( UT_Vector3I &radius);
417  fpreal64 localMax( UT_Vector3I &radius);
418  fpreal64 localMaxAbs( UT_Vector3I &radius);
419 
420  void boxBlur(int passes, UT_Vector3 radius);
421 
422  /// Compute the infinity-norm and 2-norm of the grid.
423  void computeNorms(fpreal64 &norminf, fpreal64 &norm2) const;
424 protected:
425 
426  const cl::Buffer &allocBuffer() const;
427  void releaseBuffer();
428 
429  void setValue(fpreal32 cval) const;
430 
431  // Reduction helpers
432  void getReductionRanges(const cl::Kernel &k,
433  cl::NDRange &globalRange, cl::NDRange &localRange,
434  uint &groupsize, uint &ngroups,
435  size_t &accumsize) const;
436  fpreal64 reduceFlat(cl::Buffer outgrid, uint groupsize, uint ngroups,
437  size_t accumsize, const char *reduceFlags) const;
438 
439  // Main reduction function.
440  fpreal64 doReduce(const char* reduceFlags) const;
441  bool doLocalReduce( const char * options, UT_Vector3I &radius );
442 
444  mutable bool myIsConstant;
449  int myAxis2d;
450 
455 };
456 
457 
458 #else
459 
460 class CE_API CE_Grid
461 {
462 };
463 
464 #endif
465 #endif
fpreal32 myConstantVal
Definition: CE_Grid.h:445
#define CE_API
Definition: CE_API.h:11
int getYStride2D(int axis2d) const
Definition: CE_Grid.h:218
int64 numVoxels() const
Returns the number of data voxels in the grid.
Definition: CE_Grid.h:143
int myAxis2d
Definition: CE_Grid.h:449
cl::NDRange getGlobalRange2D3D() const
Definition: CE_Grid.h:316
UT_VoxelBorderType myBorderType
Definition: CE_Grid.h:453
int getXRes() const
Definition: CE_Grid.h:96
int getYRes() const
Definition: CE_Grid.h:97
int getRes(int dim) const
Definition: CE_Grid.h:100
const GLdouble * v
Definition: glcorearb.h:837
GLdouble GLdouble GLdouble z
Definition: glcorearb.h:848
GLboolean GLboolean g
Definition: glcorearb.h:1222
int getXStride2D3D() const
Definition: CE_Grid.h:222
UT_Vector3I getGhostRes() const
Definition: CE_Grid.h:103
UT_VoxelBorderType
Definition: UT_VoxelArray.h:70
ImageBuf OIIO_API min(Image_or_Const A, Image_or_Const B, ROI roi={}, int nthreads=0)
GLint y
Definition: glcorearb.h:103
fpreal32 myBorderValue
Definition: CE_Grid.h:452
UT_Vector3I myStrides
Definition: CE_Grid.h:451
__hostdev__ void setValue(uint32_t offset, bool v)
Definition: NanoVDB.h:5750
UT_Vector3I myRes
Definition: CE_Grid.h:446
int getXRes2D3D() const
Definition: CE_Grid.h:248
float fpreal32
Definition: SYS_Types.h:200
UT_Vector3I myPadding
Definition: CE_Grid.h:448
int getStride(int dim) const
Definition: CE_Grid.h:186
UT_Vector3I myGhostCells
Definition: CE_Grid.h:447
double fpreal64
Definition: SYS_Types.h:201
int getYStride2D3D() const
Definition: CE_Grid.h:231
int64 totalVoxelMemory() const
Definition: CE_Grid.h:157
GA_API const UT_StringHolder scale
GLdouble n
Definition: glcorearb.h:2008
UT_Vector3F myBorderScale
Definition: CE_Grid.h:454
fpreal32 getBorderScale(int axis) const
Definition: CE_Grid.h:131
UT_VoxelBorderType getBorder() const
Definition: CE_Grid.h:126
OIIO_FORCEINLINE const vint4 & operator+=(vint4 &a, const vint4 &b)
Definition: simd.h:4369
cl::NDRange getGlobalRange2D(int axis2d) const
Definition: CE_Grid.h:308
int64 numTotalVoxels() const
Definition: CE_Grid.h:148
void zero()
Definition: CE_Grid.h:277
long long int64
Definition: SYS_Types.h:116
int getOffset() const
Definition: CE_Grid.h:111
int getXStride() const
Definition: CE_Grid.h:183
bool isAxis2D(int axis) const
Returns whether the specified axis is 2-dimensional.
Definition: CE_Grid.h:194
int getXStride2D(int axis2d) const
2D strides.
Definition: CE_Grid.h:217
int getZRes() const
Definition: CE_Grid.h:98
int getGhostRes(int dim) const
Definition: CE_Grid.h:104
UT_Vector3I getStrides() const
Definition: CE_Grid.h:188
static int getXAxis2D(int axis2d)
Definition: CE_Grid.h:204
GLint GLenum GLint x
Definition: glcorearb.h:409
cl::NDRange getLocalRange2D3D() const
Definition: CE_Grid.h:324
bool hasBuffer() const
If the current OpenCL buffer is valid.
Definition: CE_Grid.h:88
GLdouble t
Definition: glad.h:2397
int getZRes2D3D() const
Returns the true z resolution if the grid is 3D, or 1 if 2D.
Definition: CE_Grid.h:265
GLsizeiptr size
Definition: glcorearb.h:664
fpreal64 average() const
Definition: CE_Grid.h:409
LeafData & operator=(const LeafData &)=delete
UT_Vector3I getPadding() const
Definition: CE_Grid.h:106
ImageBuf OIIO_API max(Image_or_Const A, Image_or_Const B, ROI roi={}, int nthreads=0)
Kernel functor interface.
Definition: cl.hpp:3585
fpreal32 getBorderValue() const
Definition: CE_Grid.h:127
int getAxis2D() const
Returns the 2-dimensional axis of this grid, or -1 if there is none.
Definition: CE_Grid.h:200
bool myIsConstant
Definition: CE_Grid.h:444
Memory buffer interface.
Definition: cl.hpp:1867
static int getYAxis2D(int axis2d)
Definition: CE_Grid.h:211
NDRange interface.
Definition: cl.hpp:2466
int getYRes2D3D() const
Definition: CE_Grid.h:257
int getZStride2D3D() const
Returns the true Z-stride if the grid is 3D, or 0 if 2D.
Definition: CE_Grid.h:239
Kernel interface that implements cl_kernel.
Definition: cl.hpp:2544
int getPadding(int dim) const
Definition: CE_Grid.h:107
VectorToScalarConverter< GridType >::Type::Ptr divergence(const GridType &grid, bool threaded, InterruptT *interrupt)
Compute the divergence of the given vector-valued grid.
int getZStride() const
Definition: CE_Grid.h:185
UT_Vector3I getRes() const
Definition: CE_Grid.h:101
type
Definition: core.h:1059
Program interface that implements cl_program.
Definition: cl.hpp:2649
cl::Buffer myBuffer
Definition: CE_Grid.h:443
unsigned int uint
Definition: SYS_Types.h:45
int getYStride() const
Definition: CE_Grid.h:184
const cl::Buffer & buffer() const
Definition: CE_Grid.h:80
cl::NDRange getGlobalRange() const
Definition: CE_Grid.h:299
GLenum src
Definition: glcorearb.h:1793