HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
allocator.h
Go to the documentation of this file.
1 // Copyright (c) Microsoft Corporation. All rights reserved.
2 // Licensed under the MIT License.
3 
4 #pragma once
5 
6 #include <map>
7 
8 #include "core/common/common.h"
9 #include "core/framework/allocator_stats.h"
10 // some enums are defined in session/onnxruntime_c_api.h but used in ortdevice.h/ortmemory.h
11 #include "core/session/onnxruntime_c_api.h"
14 
15 // This configures the arena based allocator used by ORT
16 // See docs/C_API.md for details on what these mean and how to choose these values
17 struct OrtArenaCfg {
27  : max_mem(max_mem),
28  arena_extend_strategy(arena_extend_strategy),
29  initial_chunk_size_bytes(initial_chunk_size_bytes),
30  max_dead_bytes_per_chunk(max_dead_bytes_per_chunk),
31  initial_growth_chunk_size_bytes(initial_growth_chunk_size_bytes),
32  max_power_of_two_extend_bytes(max_power_of_two_extend_bytes) {}
33 
34  size_t max_mem; // use 0 to allow ORT to choose the default
35  int arena_extend_strategy; // use -1 to allow ORT to choose the default, 0 = kNextPowerOfTwo, 1 = kSameAsRequested
36  int initial_chunk_size_bytes; // use -1 to allow ORT to choose the default
37  int max_dead_bytes_per_chunk; // use -1 to allow ORT to choose the default
38  int initial_growth_chunk_size_bytes; // use -1 to allow ORT to choose the default
39  int64_t max_power_of_two_extend_bytes; // use -1 to allow ORT to choose the default
40 };
41 
42 namespace onnxruntime {
43 constexpr const char* CPU = "Cpu";
44 constexpr const char* CUDA = "Cuda";
45 constexpr const char* CUDA_PINNED = "CudaPinned";
46 constexpr const char* CANN = "Cann";
47 constexpr const char* CANN_PINNED = "CannPinned";
48 constexpr const char* DML = "DML";
49 constexpr const char* HIP = "Hip";
50 constexpr const char* HIP_PINNED = "HipPinned";
51 constexpr const char* OpenVINO_CPU = "OpenVINO_CPU";
52 constexpr const char* OpenVINO_GPU = "OpenVINO_GPU";
53 constexpr const char* WEBGPU_BUFFER = "WebGPU_Buffer";
54 
55 constexpr size_t kAllocAlignment = 256;
56 
57 class IAllocator;
58 class Stream;
59 namespace synchronize {
60 class Notification;
61 }
62 using WaitNotificationFn = std::function<void(Stream&, synchronize::Notification&)>;
63 void* AllocateBufferWithOptions(IAllocator& allocator, size_t size, bool use_reserve, Stream* stream, WaitNotificationFn wait_fn);
64 
65 template <typename T>
66 using IAllocatorUniquePtr = std::unique_ptr<T, std::function<void(T*)>>;
67 
68 class IAllocator {
69  public:
70  IAllocator(const OrtMemoryInfo& info) : memory_info_(info) {}
71  virtual ~IAllocator() = default;
72  /**
73  * Allocate memory of the specified size.
74  * If size is 0, nullptr is returned.
75  * If allocation fails, an exception is thrown.
76  *
77  * @remarks Use SafeInt when calculating the size of memory to allocate using Alloc.
78  */
79  virtual void* Alloc(size_t size) = 0;
80 
81  virtual void Free(void* p) = 0;
82 
83  // TODO: Find a better name than Reserve() and update in all places.
84  // Reserve() is an interface exposed for an implementation of IAllocator
85  // to optionally implement some allocation logic that by-passes any arena-based
86  // logic that may be housed in the Alloc() implementation.
87  // There are SessionOptions config(s) that allow users to allocate some memory
88  // by-passing arena-based logic.
89  // By default, the base implementation just calls Alloc().
90  virtual void* Reserve(size_t size) { return Alloc(size); }
91 
92  const OrtMemoryInfo& Info() const { return memory_info_; };
93 
94  // Each implementation of IAllocator can override and provide their own implementation
95  virtual void GetStats(AllocatorStats* /*stats*/) { return; }
96 
97  static bool CalcMemSizeForArray(size_t nmemb, size_t size, size_t* out) noexcept {
98  return CalcMemSizeForArrayWithAlignment(nmemb, size, 0, out);
99  }
100 
101  /**
102  * Calculate the memory size for an array. The size is bounds checked using SafeInt.
103  * \tparam alignment must be power of 2
104  * \param nmemb Number of members or elements in the array
105  * \param size Size of each element
106  * \param out Total size required after any alignment is applied
107  * \return true, successful. false, overflow
108  */
109  [[nodiscard]] static bool CalcMemSizeForArrayWithAlignment(size_t nmemb, size_t size, size_t alignment,
110  size_t* out) noexcept;
111 
112  /**
113  * https://cwe.mitre.org/data/definitions/190.html
114  * \param alignment must be power of 2
115  * \param nmemb Number of members or elements in the array
116  * \param size Size of each element
117  * \param out Total size required after any alignment is applied
118  * \return true, successful. false, overflow
119  * \remarks This was the original API and was implemented in the header. Replaced with the above version
120  * implemented in the .cc file so that the SafeInt dependency is internal.
121  */
122  template <size_t alignment>
123  [[nodiscard]] static bool CalcMemSizeForArrayWithAlignment(size_t nmemb, size_t size, size_t* out) noexcept;
124 
125  /**
126  * allocate memory for an array which has nmemb items of data, each size bytes long
127  */
128  void* AllocArray(size_t nmemb, size_t size) {
129  size_t len;
130  if (!CalcMemSizeForArray(nmemb, size, &len)) {
131  ORT_THROW("Invalid size requested for allocation: ", nmemb, " * ", size);
132  }
133 
134  return Alloc(len);
135  }
136 
137  /**
138  * allocate memory for an array which has nmemb items of data, each size bytes long
139  */
140  template <size_t alignment>
141  void* AllocArrayWithAlignment(size_t nmemb, size_t size) {
142  size_t len;
143  if (!CalcMemSizeForArrayWithAlignment(nmemb, size, alignment, &len)) {
144  ORT_THROW("Invalid size requested for allocation: ", nmemb, " * ", size, " with alignment ", alignment);
145  }
146 
147  return Alloc(len);
148  }
149 
150  /**
151  Create a std::unique_ptr that is allocated and freed by the provided IAllocator.
152  @param allocator The allocator.
153  @param count_or_bytes The exact bytes to allocate if T is void, otherwise the number of elements to allocate.
154  @param use_reserve If true, call Reserve() instead of Alloc() to allocate memory.
155  @param stream Which stream instance allocated chunk will be used with.
156  @param wait_fn If the allocator want to dynamic reuse a chunk from another stream, use this wait_fn to sync on
157  the target stream to make the reuse safe.
158  @returns std::unique_ptr with allocated memory and deleter. Throws if it cannot allocate memory.
159  */
160  template <typename T>
161  static IAllocatorUniquePtr<T> MakeUniquePtr(std::shared_ptr<IAllocator> allocator, size_t count_or_bytes,
162  bool use_reserve = false,
163  Stream* stream = nullptr, WaitNotificationFn wait_fn = nullptr) {
164  ValidateAllocator(allocator);
165 
166  // for now limit to fundamental types. we could support others, but to do so either we or the caller
167  // needs to call the dtor for the objects, for buffers allocated on device we don't have destructor
168  // static_assert(std::is_fundamental<T>::value, "Fundamental type required as no destructors are called.");
169 
170  size_t alloc_size = count_or_bytes;
171 
172  // if T is not void, 'count_or_bytes' == number of items so allow for that
173  if constexpr (!std::is_void<T>::value) {
174  // sizeof(void) isn't valid, but the compiler isn't smart enough to ignore that this line isn't
175  // reachable if T is void. use std::conditional to 'use' void* in the sizeof call
176  constexpr auto size = sizeof(typename std::conditional<std::is_void<T>::value, void*, T>::type);
177  alloc_size = ValidatedCalcMemSizeForArray(count_or_bytes, size);
178  }
179 
180  // allocate
181  T* p = static_cast<T*>(AllocateBufferWithOptions(*allocator, alloc_size, use_reserve, stream, std::move(wait_fn)));
182  ValidateAllocation(p, alloc_size);
183 
184  return IAllocatorUniquePtr<T>{p,
185  [allocator = std::move(allocator)](T* p) {
186  allocator->Free(p);
187  }};
188  }
189 
190  /**
191  Create a std::unique_ptr that is allocated and freed by the provided OrtAllocator.
192  @param ort_allocator The allocator.
193  @param count_or_bytes The exact bytes to allocate if T is void, otherwise the number of elements to allocate.
194  @returns std::unique_ptr with allocated memory and deleter. Throws if it cannot allocate memory.
195  */
196  template <typename T>
197  static IAllocatorUniquePtr<T> MakeUniquePtrFromOrtAllocator(OrtAllocator* ort_allocator, size_t count_or_bytes) {
198  ValidateAllocator(ort_allocator);
199 
200  size_t alloc_size = count_or_bytes;
201  // if T is not void, 'count_or_bytes' == number of items so allow for that
202  if constexpr (!std::is_void<T>::value) {
203  // sizeof(void) isn't valid, but the compiler isn't smart enough to ignore that this line isn't
204  // reachable if T is void. use std::conditional to 'use' void* in the sizeof call
205  constexpr auto size = sizeof(typename std::conditional<std::is_void<T>::value, void*, T>::type);
206  alloc_size = ValidatedCalcMemSizeForArray(count_or_bytes, size);
207  }
208 
209  T* p = static_cast<T*>(ort_allocator->Alloc(ort_allocator, alloc_size));
210  ValidateAllocation(p, alloc_size);
211 
212  return IAllocatorUniquePtr<T>{p,
213  [ort_allocator](T* p) {
214  ort_allocator->Free(ort_allocator, p);
215  }};
216  }
217 
218  private:
219  //
220  // validation functions. split out from methods that are templatized on the data type to minimize binary size.
221  //
222 
223  template <typename T>
224  static void ValidateAllocator(const T& allocator) {
225  ORT_ENFORCE(allocator != nullptr);
226  }
227 
228  static size_t ValidatedCalcMemSizeForArray(size_t count, size_t size) {
229  size_t alloc_size = 0;
230  if (!CalcMemSizeForArray(count, size, &alloc_size)) {
231  ORT_THROW("Invalid size requested for allocation: ", count, " * ", size);
232  }
233 
234  return alloc_size;
235  }
236 
237  static void ValidateAllocation(void* p, size_t size) {
238  // allocator should throw directly but in case it didn't ensure we do here so that calling code doesn't
239  // need to check for nullptr when an actual allocation was expected.
240  ORT_ENFORCE(p != nullptr || size == 0, "Memory allocation failed. Size=", size);
241  };
242 
243  OrtMemoryInfo memory_info_;
244 };
245 
246 template <size_t alignment>
247 bool IAllocator::CalcMemSizeForArrayWithAlignment(size_t nmemb, size_t size, size_t* out) noexcept {
248  return CalcMemSizeForArrayWithAlignment(nmemb, size, alignment, out);
249 }
250 
251 class CPUAllocator : public IAllocator {
252  public:
253  explicit CPUAllocator(const OrtMemoryInfo& memory_info) : IAllocator(memory_info) {}
254 
255  CPUAllocator() : IAllocator(OrtMemoryInfo(CPU, OrtAllocatorType::OrtDeviceAllocator)) {}
256 
257  void* Alloc(size_t size) override;
258  void Free(void* p) override;
259 };
260 
261 using AllocatorPtr = std::shared_ptr<IAllocator>;
262 using AllocatorMap = std::map<OrtDevice, AllocatorPtr>;
263 
264 void* AllocatorDefaultAlloc(size_t size);
265 void AllocatorDefaultFree(void* p);
266 } // namespace onnxruntime
GLuint GLuint stream
Definition: glcorearb.h:1832
virtual void Free(void *p)=0
constexpr const char * WEBGPU_BUFFER
Definition: allocator.h:53
void * AllocatorDefaultAlloc(size_t size)
constexpr const char * OpenVINO_GPU
Definition: allocator.h:52
size_t max_mem
Definition: allocator.h:34
constexpr const char * CPU
Definition: allocator.h:43
virtual void GetStats(AllocatorStats *)
Definition: allocator.h:95
constexpr size_t kAllocAlignment
Definition: allocator.h:55
constexpr const char * CUDA_PINNED
Definition: allocator.h:45
GLsizei const GLfloat * value
Definition: glcorearb.h:824
std::function< void(Stream &, synchronize::Notification &)> WaitNotificationFn
Definition: allocator.h:62
void * AllocateBufferWithOptions(IAllocator &allocator, size_t size, bool use_reserve, Stream *stream, WaitNotificationFn wait_fn)
#define ORT_ENFORCE(condition,...)
Definition: common.h:172
constexpr const char * CUDA
Definition: allocator.h:44
constexpr const char * HIP_PINNED
Definition: allocator.h:50
const OrtMemoryInfo & Info() const
Definition: allocator.h:92
constexpr const char * HIP
Definition: allocator.h:49
int initial_growth_chunk_size_bytes
Definition: allocator.h:38
OrtArenaCfg(size_t max_mem, int arena_extend_strategy, int initial_chunk_size_bytes, int max_dead_bytes_per_chunk, int initial_growth_chunk_size_bytes, int64_t max_power_of_two_extend_bytes)
Definition: allocator.h:24
static bool CalcMemSizeForArrayWithAlignment(size_t nmemb, size_t size, size_t alignment, size_t *out) noexcept
int arena_extend_strategy
Definition: allocator.h:35
static IAllocatorUniquePtr< T > MakeUniquePtr(std::shared_ptr< IAllocator > allocator, size_t count_or_bytes, bool use_reserve=false, Stream *stream=nullptr, WaitNotificationFn wait_fn=nullptr)
Definition: allocator.h:161
std::unique_ptr< T, std::function< void(T *)>> IAllocatorUniquePtr
Definition: allocator.h:66
static bool CalcMemSizeForArray(size_t nmemb, size_t size, size_t *out) noexcept
Definition: allocator.h:97
void Free(void *p) override
virtual void * Reserve(size_t size)
Definition: allocator.h:90
std::map< OrtDevice, AllocatorPtr > AllocatorMap
Definition: allocator.h:262
void AllocatorDefaultFree(void *p)
constexpr const char * CANN
Definition: allocator.h:46
CPUAllocator(const OrtMemoryInfo &memory_info)
Definition: allocator.h:253
void * AllocArray(size_t nmemb, size_t size)
Definition: allocator.h:128
int64_t max_power_of_two_extend_bytes
Definition: allocator.h:39
#define ORT_THROW(...)
Definition: common.h:162
constexpr const char * DML
Definition: allocator.h:48
GLsizeiptr size
Definition: glcorearb.h:664
std::shared_ptr< IAllocator > AllocatorPtr
Definition: allocator.h:261
int initial_chunk_size_bytes
Definition: allocator.h:36
constexpr const char * CANN_PINNED
Definition: allocator.h:47
void * Alloc(size_t size) override
virtual ~IAllocator()=default
void * AllocArrayWithAlignment(size_t nmemb, size_t size)
Definition: allocator.h:141
int max_dead_bytes_per_chunk
Definition: allocator.h:37
IAllocator(const OrtMemoryInfo &info)
Definition: allocator.h:70
type
Definition: core.h:1059
virtual void * Alloc(size_t size)=0
GLint GLsizei count
Definition: glcorearb.h:405
static IAllocatorUniquePtr< T > MakeUniquePtrFromOrtAllocator(OrtAllocator *ort_allocator, size_t count_or_bytes)
Definition: allocator.h:197
constexpr const char * OpenVINO_CPU
Definition: allocator.h:51