HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CE_Context.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  * NAME: CE_Context.h ( CE Library, C++)
7  *
8  * COMMENTS: Compute Engine Contexts.
9  */
10 
11 #ifndef __CE_Context__
12 #define __CE_Context__
13 
14 #include "CE_API.h"
15 
16 #ifdef CE_ENABLED
17 
18 #include "CE_Tracing.h"
19 
20 #include <UT/UT_Array.h>
21 #include <UT/UT_Error.h>
22 #include <UT/UT_Map.h>
23 #include <UT/UT_NonCopyable.h>
24 #include <UT/UT_StringMap.h>
25 #include <SYS/SYS_Types.h>
26 #include <SYS/SYS_Handle.h>
27 #include <iosfwd>
28 
29 class CE_MemoryPool;
30 
31 typedef void (*CE_ErrorCB)(const char *errmsg, UT_ErrorSeverity severity,
32  void *data);
33 
35 {
36 public:
39 
41 
42  virtual void rebindOGLBuffer( uint buf_obj ) = 0;
43  virtual void unbindOGLBuffer() = 0;
44  virtual bool isBinded() = 0;
45 };
46 
47 /// An OpenCL buffer that is backed by a external memory. External memory is accessed
48 /// through a platform specific memory handle.
49 /// Requires the OpenCL driver implements the following extensions:
50 /// cl_khr_external_memory
51 /// cl_khr_external_memory_opaque_fd (only on Linux and possibly Mac)
52 /// cl_khr_external_memory_win32 (only on Windows)
53 class CE_API CE_ExternalBuffer : public cl::Buffer
54 {
55 public:
57  const cl::Context &context,
59  ::size_t size,
61  cl_int *err = nullptr);
62 
63  CE_ExternalBuffer() : cl::Buffer() {}
64 };
65 /// An OpenCL image that is backed by external memory, similar to
66 /// CE_ExternalBuffer.
68 {
69 public:
71  const cl::Context &context,
73  const cl_image_format *format,
74  const cl_image_desc *desc,
76  cl_int *err = nullptr);
77 
78  CE_ExternalImage() : cl::Image() {}
79 };
80 
81 #ifndef CL_UUID_SIZE_KHR
82 #define CL_UUID_SIZE_KHR 16
83 #endif
84 
85 /// CE_Context encapsulates the OpenCL context and provides various convenience
86 /// functions for loading kernel programs and allocating GPU memory.
88 {
89 public:
90  CE_Context();
91  virtual ~CE_Context();
92 
94 
95  /// Returns a pointer to the singleton CE_Context object. This function
96  /// attempts to initialize OpenCL if it has not yet been.
97  /// gl_shared should be true if the context will be expected to interoperate
98  /// with the OpenGL context. If both gl_shared and shared_fallback are true,
99  /// then the function will try to make an unshared context in case the
100  /// shared context fails to create.
101  static CE_Context *getContext(bool gl_shared = true,
102  bool shared_fallback = true);
103  /// Returns true if interoperability between CL and GL is possible.
104  static bool isGLSharingPossible();
105 
106  /// Returns the underlying cl::Context object.
107  cl::Context getCLContext() const {return myContext;}
108 
109  /// Returns the cl::Queue object that is used to enqueue OpenCL kernels
110  /// and memory transfers.
111  cl::CommandQueue getQueue() const {return myQueue;}
112 
113  /// Returns the OpenCL Device object.
114  cl::Device getDevice() const {return myDevice;}
115 
116  ceTraceCtx getTraceContext() const {return myTraceCtx;}
117 
118  // Write OpenCL Device info to the supplied buffer.
119  static void getInfo(const cl::Device &device, UT_WorkBuffer &buffer );
120  static void getExtendedInfo(const cl::Device &device, UT_WorkBuffer &buffer );
121 
122  // Write info for all available OpenCL platforms to the supplied buffer.
123  static void getAllPlatformsInfo(UT_WorkBuffer &buffer);
124 
125  /// Get the suggested global and local ranges for the given 1-D kernel over
126  /// the specified number of items.
127  void get1DRanges(const cl::Kernel &k, size_t items,
128  cl::NDRange &g, cl::NDRange &l);
129 
130  /// Get the maximum workgroup size for the given kernel.
131  size_t getMaxWorkgroupSize(const cl::Kernel &k);
132  /// Get the array of maximum work items along each dimension supported by the
133  /// compute device.
134  std::vector<size_t> getMaxWorkItemSizes();
135 
136  /// Round up a provided group size to a larger clean one as some
137  /// driers die with prime-based groups sizes.
138  /// Less than 1024 is raised to next power of 2, greater is to a multiple
139  /// of 1024.
140  static size_t roundUpGroupSize(size_t gsize);
141 
142  /// Loads the OpenCL program specified by progname. This functions searches
143  /// for the file in the HOUDINI_OCL_PATH environment variable. Any compile-
144  /// time options can be passed in the options parameter. If the program
145  /// load succeeds, the progname will be cached, using the progrname and
146  /// options strings together as a hash value lookup. In this way the same
147  /// OpenCL program can be loaded several times with different compile-time
148  /// flags.
149  cl::Program loadProgram(const char *progname, const char *options = NULL,
150  bool recompile = false);
151  cl::Program compileProgram(const char *progtext, const char *options = NULL,
152  bool recompile = false);
153 
154  /// Create an OpenCL kernel named kernelname from the program specified by
155  /// progname. For some types of devices these kernels will be cached, as
156  /// kernels can be expensive to create. This is the recommended method
157  /// for creating kernels.
158  cl::Kernel loadKernel(const cl::Program &prog, const UT_StringRef &kernelname);
159  cl::Kernel loadKernel(const char *progname, const UT_StringRef &kernelname,
160  const char *options = NULL)
161  { return loadKernel(loadProgram(progname, options), kernelname); }
162 
163  /// Returns whether the CE_Context has been successfully initialized.
164  bool isValid() const {return myIsValid;}
165 
166  /// Returns whether the singleton CE_Context has been initialized yet. This
167  /// can be used to test whether OpenCL has been initialized without calling
168  /// getContext and forcing an attempt at initialization.
169  static bool isInitialized(bool gl_shared=false);
170 
171  /// Returns true if the OpenCL device is running on the CPU.
172  bool isCPU() const;
173 
174  /// Returns true if the OpenCL device supports double precision.
175  bool hasDoubleSupport() const {return mySupportsDouble;}
176  /// Returns true if the OpenCL device supports writing to 3D image objects.
177  bool has3DImageWriteSupport() const {return mySupports3DImageWrites;}
178 
179  /// Block until any outstanding kernel or memory transfers on the main
180  /// CommandQueue have executed. If sweepPool is true, the context's
181  /// CE_MemoryPool will sweep for any buffers that were in use when their
182  /// CE_Grid's went out of scope, but that were still active in kernels.
183  void finish(bool sweepPool=true);
184 
185  /// Allocate a buffer of specified size on the CE_Device.
186  /// usePool= true, attempts to use the underlying CE_MemoryPool to possibly return
187  /// an already allocated, unused buffer.
188  /// read=true, creates a buffer that is readable inside kernels.
189  /// write=true, creates a buffer that is writable inside kernels.
190  /// ogl_bind, specifies an OGL buffer to bind to.
191  cl::Buffer allocBuffer(int64 size, bool usePool=true, bool read=true, bool write=true, uint32 ogl_bind=SYS_UINT32_MAX);
192 
193  /// Release the specified buffer, possibly to the CE_MemoryPool.
194  void releaseBuffer(cl::Buffer &&buf);
195 
196  /// Read the specified number of bytes from the buffer.
197  void readBuffer(const cl::Buffer &buf, size_t size, void *p, bool blocking = true, size_t offset = 0);
198 
199  /// Write the specified number of bytes to the buffer.
200  void writeBuffer(const cl::Buffer &buf, size_t size, const void *p, bool blocking = true, size_t offset = 0);
201 
202  /// Enqueue the kernel over the provided ranges.
203  void enqueueKernel(const cl::Kernel &kernel, const cl::NDRange &global, const cl::NDRange &local);
204 
205  /// Keep a map buffer to bind at render time
206  /// The first time a CL::Buffer is created it can be registered to rebing to a OGL vertex buffer at drawing time.
207  /// The uint returned by the register call can be attached to a detail attribute and the drawing code can convert
208  /// the CL Buffer to a CL BufferGL.
209  uint32 registerDelayedOGLBindBuffer(CE_DelayedOGLBindBuffer* buffer);
210  void unregisterDelayedOGLBindBuffer(uint32 id);
211  CE_DelayedOGLBindBuffer* lookupDelayedOGLBindBuffer( uint id );
212 
213  /// Returns true if the context supports querying for device and driver
214  /// UUIDs that are unique across APIs (allowing for example, to match Vulkan
215  /// device selection). This requires the cl_khr_device_uuid extension.
216  bool supportsUUID();
217  /// Writes the OpenCL device UUID. Check that supportsUUID() returns true
218  /// before trying to query the UUID.
219  void getDeviceUUID(cl_uchar (&uuid)[CL_UUID_SIZE_KHR]);
220  /// Writes the OpenCL driver UUID. Check that supportsUUID() returns true
221  /// before trying to query the UUID.
222  void getDriverUUID(cl_uchar (&uuid)[CL_UUID_SIZE_KHR]);
223 
224  /// Returns true if the context supports the creation of buffers backed by
225  /// external memory. Mainly for use in sharing buffers with Vulkan.
226  bool supportsExternalMemory();
227 
228  /// Create a buffer backed by external memory. The handle will be the reference
229  /// to the actual memory object, and its lifetime is externally managed. The
230  /// memory might be owned by another GPU API, such as Vulkan. In those cases,
231  /// that handle can be obtained by a call to
232  /// vkGetMemoryFdKHR or vkGetMemoryWin32HandleKHR
233  /// This requires a few extensions to be available. Check that the context
234  /// supports these by calling CE_Context::supportsExternalMemory first.
235  CE_ExternalBuffer createExternalMemoryBuffer(SYS_Handle handle, int64_t size, bool read=true, bool write=true);
236 
237  CE_ExternalImage createExternalImage(SYS_Handle handle,
238  const cl_image_format& format,
239  const cl_image_desc& image_desc,
240  bool read = true, bool write = true);
241 
242  /// Clear the CE_MemoryPool object.
243  void clearMemoryPool();
244 
245  /// Return a pointer to pinned (page-locked) host memory. On some devices
246  /// (Nvidia), using this type of memory for the PCI/E host/device transfers
247  /// can double the throughput. Will return NULL if the memory can't be
248  /// allocated, or if the device is not a GPU.
249  fpreal32 *getPinnedBuffer(int64 size);
250 
251  cl::Buffer getXNoiseData();
252 
253  /// Standard error reporting for OpenCL exceptions. They should generally
254  /// take the form:
255  /// @code
256  /// try
257  /// {
258  /// OpenCL calls...
259  /// }
260  /// catch(cl::Error &err)
261  /// {
262  /// CE_Context::reportError(err);
263  /// ///cleanup
264  /// }
265  /// @endcode
266  /// This will not capture delayed errors, however. Instead
267  /// you will need to add a callback to intercept them.
268  static void reportError(const cl::Error &err);
269  static void outputErrorMessage(const char *errMsg);
270  static void setErrorCB(CE_ErrorCB callback, void *data);
271  static void outputWarningMessage(const char *errMsg);
272 
273  static void initMainSharedGLContext( int devicetype, void* context, void* display );
274  static bool useHalfNormalDelayedBindBuffer();
275 
276  /// Marks that an operation has run out of memory, allowing us
277  /// to report elsewhere.
278  void setOutOfMemoryFailure(bool hasfailed = true) { myOutOfMemoryFailure = true; }
279  bool hasOutOfMemoryFailureHappened() const { return myOutOfMemoryFailure; }
280 
281  /// This structure holds a device name, vendor, and device number with respect to
282  /// its vendor platform.
284  {
288  int number;
289  };
290  /// Get the vector of available devices of the given type.
291  static void getDevices(UT_Array<DeviceDescriptor>&, cl_device_type t);
292 
293  /// Get an index to the preferred/default device for the specified device
294  /// type and the list of available devices.
295  static int getDefaultDevice(
297 
298  /// Returns true if environment variables are set that override preferences.
299  static bool isEnvironmentOverride();
300 
301  // Queries the device by calling clGetDeviceInfo, but returning false and setting
302  // result to zero for unknown flags or flags that are disabled with environment
303  // variables.
304  template <class T>
305  static bool getDeviceInfoRestricted(cl_device_id device, cl_uint flag, T &result);
306 
307  /// Queries the current device give the specified flag using clGetDeviceInfo,
308  /// used by ocldeviceinfo EXPR function. Returns false for unrecognized flag.
309  bool getDeviceInfo(const char *flag, fpreal &result);
310 
311  /// This function returns the total size of addressable compute memory for
312  /// the current device.
313  size_t getAddressableMemory() const;
314 
315  /// Type of a pointer to a memory freeing function. Input is how much to
316  /// attempt to free, and return value is the actual amount freed.
317  typedef int64 (*FreeFunc)(int64);
318  /// Registers a function to be called for freeing memory on allocation
319  /// failure. Multiple such functions can be registered, and will be called
320  /// in registration order, until the allocation can be fulfilled.
321  /// Signature of the function is (int64)->(int64), where the input is how
322  /// much to attempt to free, and return value is the actual amount freed.
323  void registerMemFreeFunc(FreeFunc func);
324  /// Returns the list of registered memory freeing functions.
326  {
327  return myMemFreeFuncs;
328  }
329 
330  /// Report memory usage
331  void reportUsage(std::ostream &os) const;
332 
333 protected:
334  cl::Program *doCompileProgram(const char *progtext, const char *options,
335  bool recompile);
336 
337  /// Initialize the context for the given device.
338  void init(cl::Context &context, cl::Device &device);
339 
340  /// Releases the pinned, page-locked memory buffer.
341  void releasePinnedBuffer();
342 
343 
349  bool myIsValid;
353 
354  struct KernelInfo
355  {
358  };
359 
362 
364 
365  /// List of memory freeing functions (int64 -> int64). These are called upon
366  /// on allocation failure.
368 
369  // The pinned buffer is unique to the main thread.
372 
374 
376 
377  static void* theGLContext;
378  static void* theGLDisplay;
379  static int theGLDeviceType;
380 };
381 
382 /// NOTE: this function will retry if it fails on allocation failure, after
383 /// freeing some memory.
385 ce_enqueueKernel(const cl::CommandQueue& queue, const cl::Kernel &kernel,
386  const cl::NDRange &offset, const cl::NDRange &global, const cl::NDRange &local,
387  const std::vector<cl::Event>* events,
388  cl::Event* event);
389 
390 #endif
391 #endif
392 
#define CE_API
Definition: CE_API.h:11
struct _cl_device_id * cl_device_id
Definition: cl.h:30
uint32_t cl_uint
Definition: cl_platform.h:261
bool has3DImageWriteSupport() const
Returns true if the OpenCL device supports writing to 3D image objects.
Definition: CE_Context.h:177
bool myIsValid
Definition: CE_Context.h:349
GLenum GLuint GLenum GLsizei const GLchar * buf
Definition: glcorearb.h:2540
GLbitfield flags
Definition: glcorearb.h:1596
cl::Device getDevice() const
Returns the OpenCL Device object.
Definition: CE_Context.h:114
Unsorted map container.
Definition: UT_Map.h:107
*get result *(waiting if necessary)*A common idiom is to fire a bunch of sub tasks at the and then *wait for them to all complete We provide a helper class
Definition: thread.h:623
void
Definition: png.h:1083
GLboolean * data
Definition: glcorearb.h:131
bool myOutOfMemoryFailure
Definition: CE_Context.h:373
GLboolean GLboolean g
Definition: glcorearb.h:1222
UT_StringHolder platformVendor
Definition: CE_Context.h:287
cl::Kernel loadKernel(const char *progname, const UT_StringRef &kernelname, const char *options=NULL)
Definition: CE_Context.h:159
bool mySupports3DImageWrites
Definition: CE_Context.h:351
UT_ErrorSeverity
Definition: UT_Error.h:25
int32_t cl_int
Definition: cl_platform.h:260
uint8_t cl_uchar
Definition: cl_platform.h:257
ceTraceCtx getTraceContext() const
Definition: CE_Context.h:116
CE_API cl_int ce_enqueueKernel(const cl::CommandQueue &queue, const cl::Kernel &kernel, const cl::NDRange &offset, const cl::NDRange &global, const cl::NDRange &local, const std::vector< cl::Event > *events, cl::Event *event)
**But if you need a result
Definition: thread.h:613
virtual ~CE_DelayedOGLBindBuffer()
Definition: CE_Context.h:38
Definition: Image.h:45
float fpreal32
Definition: SYS_Types.h:200
void * ceTraceCtx
Definition: CE_Tracing.h:59
cl::CommandQueue getQueue() const
Definition: CE_Context.h:111
#define CL_UUID_SIZE_KHR
Definition: CE_Context.h:82
struct _cl_event * event
Definition: glcorearb.h:2961
fpreal32 * myPinnedData
Definition: CE_Context.h:371
Event interface for cl_event.
Definition: cl.hpp:1647
cl::CommandQueue myQueue
Definition: CE_Context.h:345
bool isValid() const
Returns whether the CE_Context has been successfully initialized.
Definition: CE_Context.h:164
GLintptr offset
Definition: glcorearb.h:665
UT_Map< uint32, CE_DelayedOGLBindBuffer * > myDelayedOGLBindBuffers
Definition: CE_Context.h:375
Definition: core.h:760
ceTraceCtx myTraceCtx
Definition: CE_Context.h:347
cl_bitfield cl_device_type
Definition: cl.h:42
cl::CommandQueue myDeviceQueue
Definition: CE_Context.h:346
GLuint writeBuffer
Definition: glcorearb.h:2674
GLint GLint GLsizei GLint GLenum format
Definition: glcorearb.h:108
CE_MemoryPool * myMemPool
Definition: CE_Context.h:363
#define UT_NON_COPYABLE(CLASS)
Define deleted copy constructor and assignment operator inside a class.
*get result *(waiting if necessary)*A common idiom is to fire a bunch of sub tasks at the queue
Definition: thread.h:623
long long int64
Definition: SYS_Types.h:116
cl::Device myDevice
Definition: CE_Context.h:348
static void * theGLDisplay
Definition: CE_Context.h:378
void setOutOfMemoryFailure(bool hasfailed=true)
Definition: CE_Context.h:278
GLenum GLenum severity
Definition: glcorearb.h:2539
#define SYS_UINT32_MAX
Definition: SYS_Types.h:172
GLdouble t
Definition: glad.h:2397
GLsizeiptr size
Definition: glcorearb.h:664
cl::Context myContext
Definition: CE_Context.h:344
CommandQueue interface for cl_command_queue.
Definition: cl.hpp:2850
static void * theGLContext
Definition: CE_Context.h:377
cl_int getInfo(Func f, cl_uint name, T *param)
Definition: cl.hpp:1030
GLenum func
Definition: glcorearb.h:783
UT_Array< FreeFunc > myMemFreeFuncs
Definition: CE_Context.h:367
fpreal64 fpreal
Definition: SYS_Types.h:277
const UT_Array< FreeFunc > & getMemFreeFuncs() const
Returns the list of registered memory freeing functions.
Definition: CE_Context.h:325
bool mySupportsDouble
Definition: CE_Context.h:350
Base class interface for all images.
Definition: cl.hpp:2098
unsigned int uint32
Definition: SYS_Types.h:40
Memory buffer interface.
Definition: cl.hpp:1867
cl::Buffer myXNoiseData
Definition: CE_Context.h:352
NDRange interface.
Definition: cl.hpp:2466
UT_StringHolder name
Definition: CE_Context.h:356
void(* CE_ErrorCB)(const char *errmsg, UT_ErrorSeverity severity, void *data)
Definition: CE_Context.h:31
Kernel interface that implements cl_kernel.
Definition: cl.hpp:2544
cl::Context getCLContext() const
Returns the underlying cl::Context object.
Definition: CE_Context.h:107
static int theGLDeviceType
Definition: CE_Context.h:379
UT_Map< const _cl_program *, UT_Array< KernelInfo > * > myKernelTable
Definition: CE_Context.h:361
Device interface for cl_device_id.
Definition: cl.hpp:1265
bool hasDoubleSupport() const
Returns true if the OpenCL device supports double precision.
Definition: CE_Context.h:175
Program interface that implements cl_program.
Definition: cl.hpp:2649
bool hasOutOfMemoryFailureHappened() const
Definition: CE_Context.h:279
unsigned int uint
Definition: SYS_Types.h:45
cl::Kernel * kernel
Definition: CE_Context.h:357
cl_bitfield cl_mem_flags
Definition: cl.h:66
Definition: format.h:895
cl::Buffer myPinnedBuffer
Definition: CE_Context.h:370
UT_StringMap< cl::Program * > myProgramTable
Definition: CE_Context.h:360