docs/hdk/_cuda_utils_8h_source.html

 // Copyright Contributors to the OpenVDB Project

 // SPDX-License-Identifier: MPL-2.0


 #ifndef NANOVDB_CUDA_UTILS_H_HAS_BEEN_INCLUDED

 #define NANOVDB_CUDA_UTILS_H_HAS_BEEN_INCLUDED


 #include <cuda.h>

 #include <cuda_runtime_api.h>


 //#if defined(DEBUG) || defined(_DEBUG)

     static inline void gpuAssert(cudaError_t code, const char* file, int line, bool abort = true)

     {

         if (code != cudaSuccess) {

             fprintf(stderr, "CUDA error %u: %s (%s:%d)\n", unsigned(code), cudaGetErrorString(code), file, line);

             //fprintf(stderr, "CUDA Runtime Error: %s %s %d\n", cudaGetErrorString(code), file, line);

             if (abort) exit(code);

         }

     }

     static inline void ptrAssert(const void* ptr, const char* msg, const char* file, int line, bool abort = true)

     {

         if (ptr == nullptr) {

             fprintf(stderr, "NULL pointer error: %s %s %d\n", msg, file, line);

             if (abort) exit(1);

         } else if (uint64_t(ptr) % NANOVDB_DATA_ALIGNMENT) {

             fprintf(stderr, "Pointer misalignment error: %s %s %d\n", msg, file, line);

             if (abort) exit(1);

         }

     }

 //#else

 //    static inline void gpuAssert(cudaError_t, const char*, int, bool = true){}

 //    static inline void ptrAssert(void*, const char*, const char*, int, bool = true){}

 //#endif


 // Convenience function for checking CUDA runtime API results

 // can be wrapped around any runtime API call. No-op in release builds.

 #define cudaCheck(ans) \

     { \

         gpuAssert((ans), __FILE__, __LINE__); \

     }


 #define checkPtr(ptr, msg) \

     { \

         ptrAssert((ptr), (msg), __FILE__, __LINE__); \

     }


 #define cudaSync() \

     { \

         cudaCheck(cudaDeviceSynchronize()); \

     }


 #define cudaCheckError() \

     { \

         cudaCheck(cudaGetLastError()); \

     }


 #if CUDART_VERSION < 11020  // 11.2 introduced cudaMallocAsync and cudaFreeAsync


 /// @brief Dummy implementation of cudaMallocAsync that calls cudaMalloc

 /// @param d_ptr Device pointer to allocated device memory

 /// @param size  Number of bytes to allocate

 /// @param dummy The stream establishing the stream ordering contract and the memory pool to allocate from (ignored)

 /// @return Cuda error code

 inline cudaError_t cudaMallocAsync(void** d_ptr, size_t size, cudaStream_t){return cudaMalloc(d_ptr, size);}


 /// @brief Dummy implementation of cudaFreeAsync that calls cudaFree

 /// @param d_ptr Device pointer that will be freed

 /// @param dummy The stream establishing the stream ordering promise (ignored)

 /// @return Cuda error code

 inline cudaError_t cudaFreeAsync(void* d_ptr, cudaStream_t){return cudaFree(d_ptr);}


 #endif


 #if defined(__CUDACC__)// the following functions only run on the GPU!


 // --- Wrapper for launching lambda kernels

 template<typename Func, typename... Args>

 __global__ void cudaLambdaKernel(const size_t numItems, Func func, Args... args)

 {

     const int tid = blockIdx.x * blockDim.x + threadIdx.x;

     if (tid >= numItems) return;

     func(tid, args...);

 }


 /// @brief Copy characters from @c src to @c dst on the device.

 /// @param dst pointer to the character array to write to.

 /// @param src pointer to the null-terminated character string to copy from.

 /// @return pointer to the character array being written to.

 /// @note Emulates the behaviour of std::strcpy.

 __device__ inline char* cudaStrcpy(char *dst, const char *src)

 {

     char *p = dst;

     do {*p++ = *src;} while(*src++);

     return dst;

 }


 /// @brief Appends a copy of the character string pointed to by @c src to

 ///        the end of the character string pointed to by @c dst on the device.

 /// @param dst pointer to the null-terminated byte string to append to.

 /// @param src pointer to the null-terminated byte string to copy from.

 /// @return pointer to the character array being appended to.

 /// @note Emulates the behaviour of std::strcat.

 __device__ inline char* cudaStrcat(char *dst, const char *src)

 {

     char *p = dst;

     while (*p) ++p;

     cudaStrcpy(p, src);

     return dst;

 }


 /// @brief Compares two null-terminated byte strings lexicographically on the device.

 /// @param lhs pointer to the null-terminated byte strings to compare

 /// @param rhs pointer to the null-terminated byte strings to compare

 /// @return Negative value if @c lhs appears before @c rhs in lexicographical order.

 ///         Zero if @c lhs and @c rhs compare equal. Positive value if @c lhs appears

 ///         after @c rhs in lexicographical order.

 __device__ inline int cudaStrcmp(const char *lhs, const char *rhs)

 {

     while(*lhs && (*lhs == *rhs)){

         lhs++;

         rhs++;

     }

     return *(const unsigned char*)lhs - *(const unsigned char*)rhs;// zero if lhs == rhs

 }


 /// @brief Test if two null-terminated byte strings are the same

 /// @param lhs pointer to the null-terminated byte strings to compare

 /// @param rhs pointer to the null-terminated byte strings to compare

 /// @return true if the two c-strings are identical

 __device__ inline bool cudaStrEq(const char *lhs, const char *rhs)

 {

     return cudaStrcmp(lhs, rhs) == 0;

 }


 #endif// __CUDACC__


 #endif// NANOVDB_CUDA_UTILS_H_HAS_BEEN_INCLUDED

cudaFreeAsync
cudaError_t cudaFreeAsync(void *d_ptr, cudaStream_t)
Dummy implementation of cudaFreeAsync that calls cudaFree.
Definition: CudaUtils.h:69

__global__
#define __global__
Definition: NanoVDB.h:216

NANOVDB_DATA_ALIGNMENT
#define NANOVDB_DATA_ALIGNMENT
Definition: NanoVDB.h:154

cudaMallocAsync
cudaError_t cudaMallocAsync(void **d_ptr, size_t size, cudaStream_t)
Dummy implementation of cudaMallocAsync that calls cudaMalloc.
Definition: CudaUtils.h:63

__device__
#define __device__
Definition: NanoVDB.h:219

cudaStream_t
struct CUstream_st * cudaStream_t
Definition: oidn.h:24

fprintf
auto fprintf(std::FILE *f, const S &fmt, const T &...args) -> int
Definition: printf.h:602

size
GLsizeiptr size
Definition: glcorearb.h:664

dst
GLenum GLenum dst
Definition: glcorearb.h:1793

func
GLenum func
Definition: glcorearb.h:783

ptr
auto ptr(T p) -> const void *
Definition: format.h:2448

args
**If you just want to fire and args
Definition: thread.h:609

src
GLenum src
Definition: glcorearb.h:1793