HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
reduce.h
Go to the documentation of this file.
1 //
2 // Copyright 2018 Pixar
3 //
4 // Licensed under the Apache License, Version 2.0 (the "Apache License")
5 // with the following modification; you may not use this file except in
6 // compliance with the Apache License and the following modification to it:
7 // Section 6. Trademarks. is deleted and replaced with:
8 //
9 // 6. Trademarks. This License does not grant permission to use the trade
10 // names, trademarks, service marks, or product names of the Licensor
11 // and its affiliates, except as required to comply with Section 4(c) of
12 // the License and to reproduce the content of the NOTICE file.
13 //
14 // You may obtain a copy of the Apache License at
15 //
16 // http://www.apache.org/licenses/LICENSE-2.0
17 //
18 // Unless required by applicable law or agreed to in writing, software
19 // distributed under the Apache License with the above modification is
20 // distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
21 // KIND, either express or implied. See the Apache License for the specific
22 // language governing permissions and limitations under the Apache License.
23 //
24 #ifndef PXR_BASE_WORK_REDUCE_H
25 #define PXR_BASE_WORK_REDUCE_H
26 
27 /// \file work/reduce.h
28 #include "pxr/pxr.h"
30 #include "pxr/base/work/api.h"
31 
32 #include <tbb/blocked_range.h>
33 #include <tbb/parallel_reduce.h>
34 #include <tbb/task_group.h>
35 
37 
38 
39 ///////////////////////////////////////////////////////////////////////////////
40 ///
41 /// Recursively splits the range [0, \p n) into subranges, which are then
42 /// reduced by invoking \p loopCallback in parallel. Each invocation of
43 /// \p loopCallback returns a single value that is the result of joining the
44 /// elements in the respective subrange. These values are then further joined
45 /// using the binary operator \p reductionCallback, until only a single value
46 /// remains. This single value is then the result of joining all elements over
47 /// the entire range [0, \p n).
48 ///
49 /// The \p loopCallback must be of the form:
50 ///
51 /// V LoopCallback(size_t begin, size_t end, const V &identity);
52 ///
53 /// The \p reductionCallback must be of the form:
54 ///
55 /// V ReductionCallback(const V &lhs, const V &rhs);
56 ///
57 /// For example, the following code reduces an array of mesh points into a
58 /// single bounding box:
59 ///
60 /// ```{.cpp}
61 ///
62 /// // Get the mesh points from which we are going to generate the bounding box.
63 /// const std::vector<Vector3> &points = GetMeshPoints();
64 ///
65 /// // Generate the bounding box by parallel reducing the points.
66 /// BoundingBox bbox = WorkParallelReduceN(
67 /// BoundingBox(),
68 /// points.size(),
69 /// [&points](size_t b, size_t e, const BoundingBox &identity){
70 /// BoundingBox bbox(identity);
71 ///
72 /// // Insert each point in this subrange into the local bounding box.
73 /// for (size_t i = b; i != e; ++i) {
74 /// bbox.InsertPoint(points[i]);
75 /// }
76 ///
77 /// // Return the local bounding box, which now encapsulates all the
78 /// // points in this subrange.
79 /// return bbox;
80 /// },
81 /// [](const BoundingBox &lhs, const BoundingBox &rhs){
82 /// // Join two bounding boxes into a single bounding box. The
83 /// // algorithm will apply this reduction step recursively until there
84 /// // is only a single bounding box left.
85 /// BoundingBox bbox(lhs);
86 /// bbox.UnionWith(rhs);
87 /// return bbox;
88 /// }
89 /// );
90 ///
91 /// ```
92 ///
93 /// \p grainSize specifies a minimum amount of work to be done per-thread.
94 /// There is overhead to launching a task and a typical guideline is that
95 /// you want to have at least 10,000 instructions to count for the overhead of
96 /// launching that task.
97 ///
98 template <typename Fn, typename Rn, typename V>
99 V
101  const V &identity,
102  size_t n,
103  Fn &&loopCallback,
104  Rn &&reductionCallback,
105  size_t grainSize)
106 {
107  if (n == 0)
108  return identity;
109 
110  // Don't bother with parallel_reduce, if concurrency is limited to 1.
111  if (WorkHasConcurrency()) {
112 
113  class Work_Body_TBB
114  {
115  public:
116  Work_Body_TBB(Fn &fn) : _fn(fn) { }
117 
118  V operator()(
119  const tbb::blocked_range<size_t> &r,
120  const V &value) const {
121  // Note that we std::forward _fn using Fn in order get the
122  // right operator().
123  // We maintain the right type in this way:
124  // If Fn is T&, then reference collapsing gives us T& for _fn
125  // If Fn is T, then std::forward correctly gives us T&& for _fn
126  return std::forward<Fn>(_fn)(r.begin(), r.end(), value);
127  }
128  private:
129  Fn &_fn;
130  };
131 
132  // In most cases we do not want to inherit cancellation state from the
133  // parent context, so we create an isolated task group context.
134  tbb::task_group_context ctx(tbb::task_group_context::isolated);
135  return tbb::parallel_reduce(tbb::blocked_range<size_t>(0,n,grainSize),
136  identity,
137  Work_Body_TBB(loopCallback),
138  std::forward<Rn>(reductionCallback),
139  tbb::auto_partitioner(),
140  ctx);
141  }
142 
143  // If concurrency is limited to 1, execute serially.
144  return std::forward<Fn>(loopCallback)(0, n, identity);
145 }
146 
147 ///////////////////////////////////////////////////////////////////////////////
148 ///
149 /// \overload
150 ///
151 /// This overload does not accept a grain size parameter and instead attempts
152 /// to automatically deduce a grain size that is optimal for the current
153 /// resource utilization and provided workload.
154 ///
155 template <typename Fn, typename Rn, typename V>
156 V
158  const V &identity,
159  size_t n,
160  Fn &&loopCallback,
161  Rn &&reductionCallback)
162 {
163  return WorkParallelReduceN(identity, n, loopCallback, reductionCallback, 1);
164 }
165 
167 
168 #endif // PXR_BASE_WORK_REDUCE_H
GLsizei const GLfloat * value
Definition: glcorearb.h:824
GLdouble n
Definition: glcorearb.h:2008
PXR_NAMESPACE_OPEN_SCOPE V WorkParallelReduceN(const V &identity, size_t n, Fn &&loopCallback, Rn &&reductionCallback, size_t grainSize)
Definition: reduce.h:100
WORK_API bool WorkHasConcurrency()
PXR_NAMESPACE_CLOSE_SCOPE PXR_NAMESPACE_OPEN_SCOPE
Definition: path.h:1432
#define PXR_NAMESPACE_CLOSE_SCOPE
Definition: pxr.h:91
Definition: core.h:1131
GLboolean r
Definition: glcorearb.h:1222