18 #include <cuda_runtime.h>
19 #include <cublas_v2.h>
40 void Init(
const OrtKernelContext& kernel_ctx) {
58 if (
sizeof(
T) >
sizeof(
void*)) {
63 OrtStatus* status = ort_api.KernelContext_GetResource(&kernel_ctx,
ORT_CUDA_RESOUCE_VERSION, resource_type, &resource);
68 memcpy(&t, &resource,
sizeof(
T));
80 ORT_CXX_API_THROW(
"failed to allocate deferred cpu memory", OrtErrorCode::ORT_RUNTIME_EXCEPTION);
90 ORT_CXX_API_THROW(
"failed to free deferred cpu memory", OrtErrorCode::ORT_RUNTIME_EXCEPTION);
#define ORT_CUDA_RESOUCE_VERSION
int32_t arena_extend_strategy
OrtAllocator * deferred_cpu_allocator
bool enable_skip_layer_norm_strict_mode
void FreeDeferredCpuMem(void *mem) const
const OrtApi & GetApi() noexcept
This returns a reference to the OrtApi interface in use.
void Init(const OrtKernelContext &kernel_ctx)
void * AllocDeferredCpuMem(size_t size) const
T FetchResource(const OrtKernelContext &kernel_ctx, CudaResource resource_type)
cudnnHandle_t cudnn_handle
struct CUstream_st * cudaStream_t
int32_t cudnn_conv_algo_search
bool cudnn_conv_use_max_workspace
bool cudnn_conv1d_pad_to_nc1d
cublasHandle_t cublas_handle
#define ORT_CXX_API_THROW(string, code)