HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
cuda_provider_options.h
Go to the documentation of this file.
1 // Copyright (c) Microsoft Corporation. All rights reserved.
2 // Copyright (c) 2023 NVIDIA Corporation.
3 // Licensed under the MIT License.
4 
5 #pragma once
6 
7 #include <limits>
8 
9 #include "onnxruntime_c_api.h"
10 #include "core/framework/arena_extend_strategy.h"
11 
12 /// <summary>
13 /// Options for the CUDA provider that are passed to SessionOptionsAppendExecutionProvider_CUDA_V2.
14 /// Please note that this struct is *similar* to OrtCUDAProviderOptions but only to be used internally.
15 /// Going forward, new cuda provider options are to be supported via this struct and usage of the publicly defined
16 /// OrtCUDAProviderOptions will be deprecated over time.
17 /// User can only get the instance of OrtCUDAProviderOptionsV2 via CreateCUDAProviderOptions.
18 /// </summary>
20  int device_id = 0; // cuda device id.
21  int has_user_compute_stream = 0; // indicator of user specified CUDA compute stream.
22  void* user_compute_stream = nullptr; // user specified CUDA compute stream.
23  int do_copy_in_default_stream = 1; // flag specifying if the default stream is to be used for copying.
24  OrtCudnnConvAlgoSearch cudnn_conv_algo_search = OrtCudnnConvAlgoSearchExhaustive; // cudnn algo search enum.
25  size_t gpu_mem_limit = std::numeric_limits<size_t>::max(); // BFC Arena memory limit for CUDA.
26  // (will be overridden by contents of `default_memory_arena_cfg` is it exists)
27  onnxruntime::ArenaExtendStrategy arena_extend_strategy = onnxruntime::ArenaExtendStrategy::kNextPowerOfTwo; // BFC Arena extension strategy.
28  // (will be overridden by contents of `default_memory_arena_cfg` is it exists)
29  OrtArenaCfg* default_memory_arena_cfg = nullptr; // BFC Arena config flags.
30  int cudnn_conv_use_max_workspace = 1; // flag specifying if maximum workspace can be used in cudnn conv algo search.
31  int enable_cuda_graph = 0; // flag specifying if the CUDA graph is to be captured for the model.
32  int cudnn_conv1d_pad_to_nc1d = 0; // flag specifying if pad Conv1D's input [N,C,D] to [N,C,1,D] or [N,C,D,1].
33  int tunable_op_enable = 0; // flag specifying if TunableOp is enabled.
34  int tunable_op_tuning_enable = 0; // flag specifying if TunableOp is enabled for tuning, this relies on TunableOp is enabled.
35  int tunable_op_max_tuning_duration_ms = 0; // Max tuning duration time limit for TunableOp.
36  int enable_skip_layer_norm_strict_mode = 0; // flag specifying if SkipLayerNorm is in strict mode. If true, use LayerNormalization kernel.
37  // The strict mode has better accuracy but lower performance.
38  int prefer_nhwc = 0; // make the CUDA EP NHWC preferred
39  int use_ep_level_unified_stream = 0; // flag specifying if ep level stream is used or not
40 };
OrtCudnnConvAlgoSearch cudnn_conv_algo_search
ImageBuf OIIO_API max(Image_or_Const A, Image_or_Const B, ROI roi={}, int nthreads=0)
Options for the CUDA provider that are passed to SessionOptionsAppendExecutionProvider_CUDA_V2. Please note that this struct is similar to OrtCUDAProviderOptions but only to be used internally. Going forward, new cuda provider options are to be supported via this struct and usage of the publicly defined OrtCUDAProviderOptions will be deprecated over time. User can only get the instance of OrtCUDAProviderOptionsV2 via CreateCUDAProviderOptions.
onnxruntime::ArenaExtendStrategy arena_extend_strategy