24 #include "core/platform/env.h"
119 class ThreadPoolInterface;
122 namespace onnxruntime {
130 namespace concurrency {
132 template <
typename Environment>
133 class ThreadPoolTempl;
135 class ExtendedThreadPoolInterface;
137 class ThreadPoolParallelSection;
156 const ThreadOptions& thread_options,
158 int degree_of_parallelism,
159 bool low_latency_hint,
160 bool force_hybrid =
false);
229 std::function<
void()> fn) {
250 const std::function<
void(std::ptrdiff_t
first, std::ptrdiff_t
last)>& fn) {
255 const std::function<
void(std::ptrdiff_t first, std::ptrdiff_t last)>& fn);
261 const std::function<
void(std::ptrdiff_t)>& fn) {
263 tp->SimpleParallelFor(total, fn);
265 for (std::ptrdiff_t i = 0; i < total; ++i) {
282 template <
typename F>
285 for (std::ptrdiff_t i = 0; i < total; ++i) {
299 if (num_batches <= 0) {
303 if (num_batches <= 1) {
304 for (
int i = 0; i < total; i++) {
310 tp->SimpleParallelFor(num_batches, [&](std::ptrdiff_t batch_index) {
312 for (std::ptrdiff_t i = work.start; i < work.end; i++) {
326 constexpr
static WorkInfo PartitionWork(std::ptrdiff_t batch_idx, std::ptrdiff_t num_batches, std::ptrdiff_t total_work) {
327 const std::ptrdiff_t work_per_batch = total_work / num_batches;
328 const std::ptrdiff_t work_per_batch_extra = total_work % num_batches;
331 if (batch_idx < work_per_batch_extra) {
332 info.
start = (work_per_batch + 1) * batch_idx;
333 info.
end = info.
start + work_per_batch + 1;
335 info.
start = work_per_batch * batch_idx + work_per_batch_extra;
336 info.
end = info.
start + work_per_batch;
374 int NumThreads()
const;
378 int CurrentThreadId()
const;
385 void RunInParallel(std::function<
void(
unsigned idx)> fn,
unsigned n, std::ptrdiff_t block_size);
393 void ParallelForFixedBlockSizeScheduling(std::ptrdiff_t total, std::ptrdiff_t block_size,
394 const std::function<
void(std::ptrdiff_t, std::ptrdiff_t)>& fn);
399 bool ShouldParallelizeLoop(
const std::ptrdiff_t num_iterations,
400 const std::ptrdiff_t block_size = 1)
const;
404 void ParallelFor(std::ptrdiff_t total,
double cost_per_unit,
405 const std::function<
void(std::ptrdiff_t first, std::ptrdiff_t last)>& fn);
407 void ParallelFor(std::ptrdiff_t total,
const TensorOpCost& cost_per_unit,
408 const std::function<
void(std::ptrdiff_t first, std::ptrdiff_t)>& fn);
410 void SimpleParallelFor(std::ptrdiff_t total,
const std::function<
void(std::ptrdiff_t)>& fn);
412 void Schedule(std::function<
void()> fn);
418 ThreadOptions thread_options_;
427 std::unique_ptr<ThreadPoolTempl<Env> > extended_eigen_threadpool_;
430 bool force_hybrid_ =
false;
GLsizei const GLchar *const * string
static void TrySimpleParallelFor(ThreadPool *tp, std::ptrdiff_t total, const std::function< void(std::ptrdiff_t)> &fn)
static void Schedule(ThreadPool *tp, std::function< void()> fn)
static void TryBatchParallelFor(ThreadPool *tp, std::ptrdiff_t total, F &&fn, std::ptrdiff_t num_batches)
ThreadPool(Env *env, const ThreadOptions &thread_options, const NAME_CHAR_TYPE *name, int degree_of_parallelism, bool low_latency_hint, bool force_hybrid=false)
static bool ShouldParallelize(const ThreadPool *tp)
static void TryParallelFor(ThreadPool *tp, std::ptrdiff_t total, double cost_per_unit, const std::function< void(std::ptrdiff_t first, std::ptrdiff_t last)> &fn)
ParallelSection(ThreadPool *tp)
#define ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(TypeName)
GLuint const GLchar * name
static int DegreeOfParallelism(const ThreadPool *tp)
static std::string StopProfiling(concurrency::ThreadPool *tp)
__hostdev__ uint64_t last(uint32_t i) const
static void StartProfiling(concurrency::ThreadPool *tp)
ORT_DISALLOW_COPY_AND_ASSIGNMENT(ThreadPool)
static constexpr WorkInfo PartitionWork(std::ptrdiff_t batch_idx, std::ptrdiff_t num_batches, std::ptrdiff_t total_work)