HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
tensorrt_provider_options.h
Go to the documentation of this file.
1 // Copyright (c) Microsoft Corporation. All rights reserved.
2 // Licensed under the MIT License.
3 
4 #pragma once
5 
6 /// <summary>
7 /// Options for the TensorRT provider that are passed to SessionOptionsAppendExecutionProvider_TensorRT_V2.
8 /// Please note that this struct is *similar* to OrtTensorRTProviderOptions but only to be used internally.
9 /// Going forward, new trt provider options are to be supported via this struct and usage of the publicly defined
10 /// OrtTensorRTProviderOptions will be deprecated over time.
11 /// User can only get the instance of OrtTensorRTProviderOptionsV2 via CreateTensorRTProviderOptions.
12 /// </summary>
14  OrtTensorRTProviderOptionsV2& operator=(const OrtTensorRTProviderOptionsV2& other); // copy assignment operator
15 
16  int device_id{0}; // cuda device id.
17  int has_user_compute_stream{0}; // indicator of user specified CUDA compute stream.
18  void* user_compute_stream{nullptr}; // user specified CUDA compute stream.
19  int trt_max_partition_iterations{1000}; // maximum iterations for TensorRT parser to get capability
20  int trt_min_subgraph_size{1}; // minimum size of TensorRT subgraphs
21  size_t trt_max_workspace_size{1 << 30}; // maximum workspace size for TensorRT.
22  int trt_fp16_enable{0}; // enable TensorRT FP16 precision. Default 0 = false, nonzero = true
23  int trt_int8_enable{0}; // enable TensorRT INT8 precision. Default 0 = false, nonzero = true
24  const char* trt_int8_calibration_table_name{nullptr}; // TensorRT INT8 calibration table name.
25  int trt_int8_use_native_calibration_table{0}; // use native TensorRT generated calibration table. Default 0 = false, nonzero = true
26  int trt_dla_enable{0}; // enable DLA. Default 0 = false, nonzero = true
27  int trt_dla_core{0}; // DLA core number. Default 0
28  int trt_dump_subgraphs{0}; // dump TRT subgraph. Default 0 = false, nonzero = true
29  int trt_engine_cache_enable{0}; // enable engine caching. Default 0 = false, nonzero = true
30  const char* trt_engine_cache_path{nullptr}; // specify engine cache path, defaults to the working directory
31  int trt_engine_decryption_enable{0}; // enable engine decryption. Default 0 = false, nonzero = true
32  const char* trt_engine_decryption_lib_path{nullptr}; // specify engine decryption library path
33  int trt_force_sequential_engine_build{0}; // force building TensorRT engine sequentially. Default 0 = false, nonzero = true
34  int trt_context_memory_sharing_enable{0}; // enable context memory sharing between subgraphs. Default 0 = false, nonzero = true
35  int trt_layer_norm_fp32_fallback{0}; // force Pow + Reduce ops in layer norm to FP32. Default 0 = false, nonzero = true
36  int trt_timing_cache_enable{0}; // enable TensorRT timing cache. Default 0 = false, nonzero = true
37  const char* trt_timing_cache_path{nullptr}; // specify timing cache path, if none is provided the trt_engine_cache_path is used
38  int trt_force_timing_cache{0}; // force the TensorRT cache to be used even if device profile does not match. Default 0 = false, nonzero = true
39  int trt_detailed_build_log{0}; // Enable detailed build step logging on TensorRT EP with timing for each engine build. Default 0 = false, nonzero = true
40  int trt_build_heuristics_enable{0}; // Build engine using heuristics to reduce build time. Default 0 = false, nonzero = true
41  int trt_sparsity_enable{0}; // Control if sparsity can be used by TRT. Default 0 = false, 1 = true
42  int trt_builder_optimization_level{3}; // Set the builder optimization level. WARNING: levels below 3 do not guarantee good engine performance, but greatly improve build time. Default 3, valid range [0-5]
43  int trt_auxiliary_streams{-1}; // Set maximum number of auxiliary streams per inference stream. Setting this value to 0 will lead to optimal memory usage. Default -1 = heuristics
44  const char* trt_tactic_sources{nullptr}; // pecify the tactics to be used by adding (+) or removing (-) tactics from the default
45  // tactic sources (default = all available tactics) e.g. "-CUDNN,+CUBLAS" available keys: "CUBLAS"|"CUBLAS_LT"|"CUDNN"|"EDGE_MASK_CONVOLUTIONS"
46  const char* trt_extra_plugin_lib_paths{nullptr}; // specify extra TensorRT plugin library paths
47  const char* trt_profile_min_shapes{nullptr}; // Specify the range of the input shapes to build the engine with
48  const char* trt_profile_max_shapes{nullptr}; // Specify the range of the input shapes to build the engine with
49  const char* trt_profile_opt_shapes{nullptr}; // Specify the range of the input shapes to build the engine with
50  int trt_cuda_graph_enable{0}; // Enable CUDA graph in ORT TRT
51 
52  /*
53  * Please note that there are rules for using following context model related provider options:
54  *
55  * 1. In the case of dumping the context model and loading the context model,
56  * for security reason, TRT EP doesn't allow the "ep_cache_context" node attribute of EP context node to be
57  * the absolute path or relative path that is outside of context model directory.
58  * It means engine cache needs to be in the same directory or sub-directory of context model.
59  *
60  * 2. In the case of dumping the context model, the engine cache path will be changed to the relative path of context model directory.
61  * For example:
62  * If "trt_dump_ep_context_model" is enabled and "trt_engine_cache_enable" is enabled,
63  * if "trt_ep_context_file_path" is "./context_model_dir",
64  * - if "trt_engine_cache_path" is "" -> the engine cache will be saved to "./context_model_dir"
65  * - if "trt_engine_cache_path" is "engine_dir" -> the engine cache will be saved to "./context_model_dir/engine_dir"
66  *
67  */
68  int trt_dump_ep_context_model{0}; // Dump EP context node model
69  const char* trt_ep_context_file_path{nullptr}; // Specify file name to dump EP context node model. Can be a path or a file name or a file name with path.
70  int trt_ep_context_embed_mode{0}; // Specify EP context embed mode. Default 0 = context is engine cache path, 1 = context is engine binary data
71 
72  const char* trt_engine_cache_prefix{nullptr}; // specify engine cache prefix
73 };
Options for the TensorRT provider that are passed to SessionOptionsAppendExecutionProvider_TensorRT_V...
OrtTensorRTProviderOptionsV2 & operator=(const OrtTensorRTProviderOptionsV2 &other)