HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CudaUtils.h
Go to the documentation of this file.
1 // Copyright Contributors to the OpenVDB Project
2 // SPDX-License-Identifier: MPL-2.0
3 
4 #ifndef NANOVDB_CUDA_UTILS_H_HAS_BEEN_INCLUDED
5 #define NANOVDB_CUDA_UTILS_H_HAS_BEEN_INCLUDED
6 
7 #include <cuda.h>
8 #include <cuda_runtime_api.h>
9 
10 //#if defined(DEBUG) || defined(_DEBUG)
11  static inline void gpuAssert(cudaError_t code, const char* file, int line, bool abort = true)
12  {
13  if (code != cudaSuccess) {
14  fprintf(stderr, "CUDA error %u: %s (%s:%d)\n", unsigned(code), cudaGetErrorString(code), file, line);
15  //fprintf(stderr, "CUDA Runtime Error: %s %s %d\n", cudaGetErrorString(code), file, line);
16  if (abort) exit(code);
17  }
18  }
19  static inline void ptrAssert(const void* ptr, const char* msg, const char* file, int line, bool abort = true)
20  {
21  if (ptr == nullptr) {
22  fprintf(stderr, "NULL pointer error: %s %s %d\n", msg, file, line);
23  if (abort) exit(1);
24  } else if (uint64_t(ptr) % NANOVDB_DATA_ALIGNMENT) {
25  fprintf(stderr, "Pointer misalignment error: %s %s %d\n", msg, file, line);
26  if (abort) exit(1);
27  }
28  }
29 //#else
30 // static inline void gpuAssert(cudaError_t, const char*, int, bool = true){}
31 // static inline void ptrAssert(void*, const char*, const char*, int, bool = true){}
32 //#endif
33 
34 // Convenience function for checking CUDA runtime API results
35 // can be wrapped around any runtime API call. No-op in release builds.
36 #define cudaCheck(ans) \
37  { \
38  gpuAssert((ans), __FILE__, __LINE__); \
39  }
40 
41 #define checkPtr(ptr, msg) \
42  { \
43  ptrAssert((ptr), (msg), __FILE__, __LINE__); \
44  }
45 
46 #define cudaSync() \
47  { \
48  cudaCheck(cudaDeviceSynchronize()); \
49  }
50 
51 #define cudaCheckError() \
52  { \
53  cudaCheck(cudaGetLastError()); \
54  }
55 
56 #if CUDART_VERSION < 11020 // 11.2 introduced cudaMallocAsync and cudaFreeAsync
57 
58 /// @brief Dummy implementation of cudaMallocAsync that calls cudaMalloc
59 /// @param d_ptr Device pointer to allocated device memory
60 /// @param size Number of bytes to allocate
61 /// @param dummy The stream establishing the stream ordering contract and the memory pool to allocate from (ignored)
62 /// @return Cuda error code
63 inline cudaError_t cudaMallocAsync(void** d_ptr, size_t size, cudaStream_t){return cudaMalloc(d_ptr, size);}
64 
65 /// @brief Dummy implementation of cudaFreeAsync that calls cudaFree
66 /// @param d_ptr Device pointer that will be freed
67 /// @param dummy The stream establishing the stream ordering promise (ignored)
68 /// @return Cuda error code
69 inline cudaError_t cudaFreeAsync(void* d_ptr, cudaStream_t){return cudaFree(d_ptr);}
70 
71 #endif
72 
73 #if defined(__CUDACC__)// the following functions only run on the GPU!
74 
75 // --- Wrapper for launching lambda kernels
76 template<typename Func, typename... Args>
77 __global__ void cudaLambdaKernel(const size_t numItems, Func func, Args... args)
78 {
79  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
80  if (tid >= numItems) return;
81  func(tid, args...);
82 }
83 
84 /// @brief Copy characters from @c src to @c dst on the device.
85 /// @param dst pointer to the character array to write to.
86 /// @param src pointer to the null-terminated character string to copy from.
87 /// @return pointer to the character array being written to.
88 /// @note Emulates the behaviour of std::strcpy.
89 __device__ inline char* cudaStrcpy(char *dst, const char *src)
90 {
91  char *p = dst;
92  do {*p++ = *src;} while(*src++);
93  return dst;
94 }
95 
96 /// @brief Appends a copy of the character string pointed to by @c src to
97 /// the end of the character string pointed to by @c dst on the device.
98 /// @param dst pointer to the null-terminated byte string to append to.
99 /// @param src pointer to the null-terminated byte string to copy from.
100 /// @return pointer to the character array being appended to.
101 /// @note Emulates the behaviour of std::strcat.
102 __device__ inline char* cudaStrcat(char *dst, const char *src)
103 {
104  char *p = dst;
105  while (*p) ++p;
106  cudaStrcpy(p, src);
107  return dst;
108 }
109 
110 /// @brief Compares two null-terminated byte strings lexicographically on the device.
111 /// @param lhs pointer to the null-terminated byte strings to compare
112 /// @param rhs pointer to the null-terminated byte strings to compare
113 /// @return Negative value if @c lhs appears before @c rhs in lexicographical order.
114 /// Zero if @c lhs and @c rhs compare equal. Positive value if @c lhs appears
115 /// after @c rhs in lexicographical order.
116 __device__ inline int cudaStrcmp(const char *lhs, const char *rhs)
117 {
118  while(*lhs && (*lhs == *rhs)){
119  lhs++;
120  rhs++;
121  }
122  return *(const unsigned char*)lhs - *(const unsigned char*)rhs;// zero if lhs == rhs
123 }
124 
125 /// @brief Test if two null-terminated byte strings are the same
126 /// @param lhs pointer to the null-terminated byte strings to compare
127 /// @param rhs pointer to the null-terminated byte strings to compare
128 /// @return true if the two c-strings are identical
129 __device__ inline bool cudaStrEq(const char *lhs, const char *rhs)
130 {
131  return cudaStrcmp(lhs, rhs) == 0;
132 }
133 
134 #endif// __CUDACC__
135 
136 #endif// NANOVDB_CUDA_UTILS_H_HAS_BEEN_INCLUDED
cudaError_t cudaFreeAsync(void *d_ptr, cudaStream_t)
Dummy implementation of cudaFreeAsync that calls cudaFree.
Definition: CudaUtils.h:69
#define __global__
Definition: NanoVDB.h:216
#define NANOVDB_DATA_ALIGNMENT
Definition: NanoVDB.h:154
cudaError_t cudaMallocAsync(void **d_ptr, size_t size, cudaStream_t)
Dummy implementation of cudaMallocAsync that calls cudaMalloc.
Definition: CudaUtils.h:63
#define __device__
Definition: NanoVDB.h:219
struct CUstream_st * cudaStream_t
Definition: oidn.h:24
auto fprintf(std::FILE *f, const S &fmt, const T &...args) -> int
Definition: printf.h:602
GLsizeiptr size
Definition: glcorearb.h:664
GLenum GLenum dst
Definition: glcorearb.h:1793
GLenum func
Definition: glcorearb.h:783
auto ptr(T p) -> const void *
Definition: format.h:2448
**If you just want to fire and args
Definition: thread.h:609
GLenum src
Definition: glcorearb.h:1793