HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
timing.h
Go to the documentation of this file.
1 //
2 // Copyright 2016 Pixar
3 //
4 // Licensed under the Apache License, Version 2.0 (the "Apache License")
5 // with the following modification; you may not use this file except in
6 // compliance with the Apache License and the following modification to it:
7 // Section 6. Trademarks. is deleted and replaced with:
8 //
9 // 6. Trademarks. This License does not grant permission to use the trade
10 // names, trademarks, service marks, or product names of the Licensor
11 // and its affiliates, except as required to comply with Section 4(c) of
12 // the License and to reproduce the content of the NOTICE file.
13 //
14 // You may obtain a copy of the Apache License at
15 //
16 // http://www.apache.org/licenses/LICENSE-2.0
17 //
18 // Unless required by applicable law or agreed to in writing, software
19 // distributed under the Apache License with the above modification is
20 // distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
21 // KIND, either express or implied. See the Apache License for the specific
22 // language governing permissions and limitations under the Apache License.
23 //
24 #ifndef PXR_BASE_ARCH_TIMING_H
25 #define PXR_BASE_ARCH_TIMING_H
26 
27 /// \file arch/timing.h
28 /// \ingroup group_arch_SystemFunctions
29 /// High-resolution, low-cost timing routines.
30 
31 #include "pxr/pxr.h"
32 #include "pxr/base/arch/api.h"
33 #include "pxr/base/arch/defines.h"
34 #include "pxr/base/arch/inttypes.h"
35 
36 /// \addtogroup group_arch_SystemFunctions
37 ///@{
38 
39 #if defined(ARCH_OS_LINUX) && defined(ARCH_CPU_INTEL)
40 #include <x86intrin.h>
41 #elif defined(ARCH_OS_DARWIN)
42 #include <mach/mach_time.h>
43 #elif defined(ARCH_OS_WINDOWS)
44 #include <intrin.h>
45 #endif
46 
47 #include <algorithm>
48 #include <atomic>
49 #include <iterator>
50 #include <numeric>
51 
53 
54 /// Return the current time in system-dependent units.
55 ///
56 /// The current time is returned as a number of "ticks", where each tick
57 /// represents some system-dependent amount of time. The resolution of the
58 /// timing routines varies, but on all systems, it is well under one
59 /// microsecond. The cost of this routine is in the 10s-to-100s of nanoseconds
60 /// on GHz class machines.
61 inline uint64_t
63 {
64 #if defined(ARCH_OS_DARWIN)
65  // On Darwin we'll use mach_absolute_time().
66  return mach_absolute_time();
67 #elif defined(ARCH_CPU_INTEL)
68  // On Intel we'll use the rdtsc instruction.
69  return __rdtsc();
70 #elif defined (ARCH_CPU_ARM)
71  uint64_t result;
72  __asm __volatile("mrs %0, CNTVCT_EL0" : "=&r" (result));
73  return result;
74 #else
75 #error Unknown architecture.
76 #endif
77 }
78 
79 
80 /// Get a "start" tick time for measuring an interval of time, followed by a
81 /// later call to ArchGetStopTickTime(). Or see ArchIntervalTimer. This is
82 /// like ArchGetTickTime but it includes compiler & CPU fencing & reordering
83 /// constraints in an attempt to get the best measurement possible.
84 inline uint64_t
86 {
87  uint64_t t;
88 #if defined (ARCH_OS_DARWIN)
89  return ArchGetTickTime();
90 #elif defined (ARCH_CPU_ARM)
91  std::atomic_signal_fence(std::memory_order_seq_cst);
92  asm volatile("mrs %0, cntvct_el0" : "=r"(t));
93  std::atomic_signal_fence(std::memory_order_seq_cst);
94 #elif defined (ARCH_COMPILER_MSVC)
95  _mm_lfence();
96  std::atomic_signal_fence(std::memory_order_seq_cst);
97  t = __rdtsc();
98  _mm_lfence();
99  std::atomic_signal_fence(std::memory_order_seq_cst);
100 #elif defined(ARCH_CPU_INTEL) && \
101  (defined(ARCH_COMPILER_CLANG) || defined(ARCH_COMPILER_GCC))
102  // Prevent reorders by the compiler.
103  std::atomic_signal_fence(std::memory_order_seq_cst);
104  asm volatile(
105  "lfence\n\t"
106  "rdtsc\n\t"
107  "shl $32, %%rdx\n\t"
108  "or %%rdx, %0\n\t"
109  "lfence"
110  : "=a"(t)
111  :
112  // rdtsc writes rdx
113  // shl modifies cc flags
114  : "rdx", "cc");
115 #else
116 #error "Unsupported architecture."
117 #endif
118  return t;
119 }
120 
121 /// Get a "stop" tick time for measuring an interval of time. See
122 /// ArchGetStartTickTime() or ArchIntervalTimer. This is like ArchGetTickTime
123 /// but it includes compiler & CPU fencing & reordering constraints in an
124 /// attempt to get the best measurement possible.
125 inline uint64_t
127 {
128  uint64_t t;
129 #if defined (ARCH_OS_DARWIN)
130  return ArchGetTickTime();
131 #elif defined (ARCH_CPU_ARM)
132  std::atomic_signal_fence(std::memory_order_seq_cst);
133  asm volatile("mrs %0, cntvct_el0" : "=r"(t));
134  std::atomic_signal_fence(std::memory_order_seq_cst);
135 #elif defined (ARCH_COMPILER_MSVC)
136  std::atomic_signal_fence(std::memory_order_seq_cst);
137  unsigned aux;
138  t = __rdtscp(&aux);
139  _mm_lfence();
140  std::atomic_signal_fence(std::memory_order_seq_cst);
141 #elif defined(ARCH_CPU_INTEL) && \
142  (defined(ARCH_COMPILER_CLANG) || defined(ARCH_COMPILER_GCC))
143  std::atomic_signal_fence(std::memory_order_seq_cst);
144  asm volatile(
145  "rdtscp\n\t"
146  "shl $32, %%rdx\n\t"
147  "or %%rdx, %0\n\t"
148  "lfence"
149  : "=a"(t)
150  :
151  // rdtscp writes rcx & rdx
152  // shl modifies cc flags
153  : "rcx", "rdx", "cc");
154 #else
155 #error "Unsupported architecture."
156 #endif
157  return t;
158 }
159 
160 #if defined (doxygen) || \
161  (!defined(ARCH_OS_DARWIN) && defined(ARCH_CPU_INTEL) && \
162  (defined(ARCH_COMPILER_CLANG) || defined(ARCH_COMPILER_GCC)))
163 
164 /// A simple timer class for measuring an interval of time using the
165 /// ArchTickTimer facilities.
166 struct ArchIntervalTimer
167 {
168  /// Construct a timer and start timing if \p start is true.
169  explicit ArchIntervalTimer(bool start=true)
170  : _started(start) {
171  if (_started) {
172  Start();
173  }
174  }
175 
176  /// Start the timer, or reset the start time if it has already been started.
177  void Start() {
178  _started = true;
179  std::atomic_signal_fence(std::memory_order_seq_cst);
180  asm volatile(
181  "lfence\n\t"
182  "rdtsc\n\t"
183  "lfence"
184  : "=a"(_startLow), "=d"(_startHigh) :: );
185  }
186 
187  /// Return true if this timer is started.
188  bool IsStarted() const {
189  return _started;
190  }
191 
192  /// Return this timer's start time, or 0 if it hasn't been started.
193  uint64_t GetStartTicks() const {
194  return (uint64_t(_startHigh) << 32) + _startLow;
195  }
196 
197  /// Read and return the current time.
198  uint64_t GetCurrentTicks() {
199  return ArchGetStopTickTime();
200  }
201 
202  /// Read the current time and return the difference between it and the start
203  /// time. If the timer was not started, return 0.
204  uint64_t GetElapsedTicks() {
205  if (!_started) {
206  return 0;
207  }
208  uint32_t stopLow, stopHigh;
209  std::atomic_signal_fence(std::memory_order_seq_cst);
210  asm volatile(
211  "rdtscp\n\t"
212  "lfence"
213  : "=a"(stopLow), "=d"(stopHigh)
214  :
215  // rdtscp writes rcx
216  : "rcx");
217  return ((uint64_t(stopHigh) << 32) + stopLow) -
218  ((uint64_t(_startHigh) << 32) + _startLow);
219  }
220 private:
221  bool _started = false;
222  uint32_t _startLow = 0, _startHigh = 0;
223 };
224 
225 #else
226 
228 {
229  explicit ArchIntervalTimer(bool start=true)
230  : _started(start) {
231  if (_started) {
232  _startTicks = ArchGetStartTickTime();
233  }
234  }
235 
236  void Start() {
237  _started = true;
238  _startTicks = ArchGetStartTickTime();
239  }
240 
241  bool IsStarted() const {
242  return _started;
243  }
244 
245  uint64_t GetStartTicks() const {
246  return _startTicks;
247  }
248 
249  uint64_t GetCurrentTicks() {
250  return ArchGetStopTickTime();
251  }
252 
253  uint64_t GetElapsedTicks() {
254  if (!_started) {
255  return 0;
256  }
257  return ArchGetStopTickTime() - _startTicks;
258  }
259 private:
260  bool _started = false;
261  uint64_t _startTicks;
262 };
263 
264 #endif
265 
266 /// Return the tick time resolution. Although the number of ticks per second
267 /// may be very large, on many current systems the tick timers do not update at
268 /// that rate. Rather, sequential calls to ArchGetTickTime() may report
269 /// increases of 10s to 100s of ticks, with a minimum increment betwewen calls.
270 /// This function returns that minimum increment as measured at startup time.
271 ///
272 /// Note that if this value is of sufficient size, then short times measured
273 /// with tick timers are potentially subject to significant noise. In
274 /// particular, an interval of measured tick time is liable to be off by +/- one
275 /// ArchGetTickQuantum().
276 ARCH_API
277 uint64_t ArchGetTickQuantum();
278 
279 /// Return the ticks taken to record an interval of time with ArchIntervalTimer,
280 /// as measured at startup time.
281 ARCH_API
283 
284 
285 /// Convert a duration measured in "ticks", as returned by
286 /// \c ArchGetTickTime(), to nanoseconds.
287 ///
288 /// An example to test the timing routines would be:
289 /// \code
290 /// ArchIntervalTimer iTimer;
291 /// sleep(10);
292 ///
293 /// // duration should be approximately 10/// 1e9 = 1e10 nanoseconds.
294 /// int64_t duration = ArchTicksToNanoseconds(iTimer.GetElapsedTicks());
295 /// \endcode
296 ///
297 ARCH_API
298 int64_t ArchTicksToNanoseconds(uint64_t nTicks);
299 
300 /// Convert a duration measured in "ticks", as returned by
301 /// \c ArchGetTickTime(), to seconds.
302 ARCH_API
303 double ArchTicksToSeconds(uint64_t nTicks);
304 
305 /// Convert a duration in seconds to "ticks", as returned by
306 /// \c ArchGetTickTime().
307 ARCH_API
308 uint64_t ArchSecondsToTicks(double seconds);
309 
310 /// Get nanoseconds per tick. Useful when converting ticks obtained from
311 /// \c ArchTickTime()
312 ARCH_API
314 
315 ARCH_API
316 uint64_t
317 Arch_MeasureExecutionTime(uint64_t maxTicks, bool *reachedConsensus,
318  void const *m, uint64_t (*callM)(void const *, int));
319 
320 /// Run \p fn repeatedly attempting to determine a consensus fastest execution
321 /// time with low noise, for up to \p maxTicks, then return the consensus
322 /// fastest execution time. If a consensus is not reached in that time, return
323 /// a best estimate instead. If \p reachedConsensus is not null, set it to
324 /// indicate whether or not a consensus was reached. This function ignores \p
325 /// maxTicks greater than 5 billion ticks and runs for up to 5 billion ticks
326 /// instead. The \p fn will run for an indeterminate number of times, so it
327 /// should be side-effect free. Also, it should do essentially the same work
328 /// on every invocation so that timing its execution makes sense.
329 template <class Fn>
330 uint64_t
332  Fn const &fn,
333  uint64_t maxTicks = 1e7,
334  bool *reachedConsensus = nullptr)
335 {
336  auto measureN = [&fn](int nTimes) -> uint64_t {
337  ArchIntervalTimer iTimer;
338  for (int i = nTimes; i--; ) {
339  std::atomic_signal_fence(std::memory_order_seq_cst);
340  (void)fn();
341  std::atomic_signal_fence(std::memory_order_seq_cst);
342  }
343  return iTimer.GetElapsedTicks();
344  };
345 
346  using MeasureNType = decltype(measureN);
347 
349  maxTicks, reachedConsensus,
350  static_cast<void const *>(&measureN),
351  [](void const *mN, int nTimes) {
352  return (*static_cast<MeasureNType const *>(mN))(nTimes);
353  });
354 }
355 
356 ///@}
357 
359 
360 #endif // PXR_BASE_ARCH_TIMING_H
ARCH_API double ArchTicksToSeconds(uint64_t nTicks)
void
Definition: png.h:1083
GLuint start
Definition: glcorearb.h:475
bool IsStarted() const
Definition: timing.h:241
ARCH_API double ArchGetNanosecondsPerTick()
ARCH_API int64_t ArchTicksToNanoseconds(uint64_t nTicks)
**But if you need a result
Definition: thread.h:613
ARCH_API uint64_t ArchGetIntervalTimerTickOverhead()
uint64_t GetElapsedTicks()
Definition: timing.h:253
uint64_t GetCurrentTicks()
Definition: timing.h:249
ArchIntervalTimer(bool start=true)
Definition: timing.h:229
uint64_t GetStartTicks() const
Definition: timing.h:245
uint64_t ArchMeasureExecutionTime(Fn const &fn, uint64_t maxTicks=1e7, bool *reachedConsensus=nullptr)
Definition: timing.h:331
ARCH_API uint64_t ArchSecondsToTicks(double seconds)
GLdouble t
Definition: glad.h:2397
void Start()
Definition: timing.h:236
PXR_NAMESPACE_OPEN_SCOPE uint64_t ArchGetTickTime()
Definition: timing.h:62
uint64_t ArchGetStartTickTime()
Definition: timing.h:85
PXR_NAMESPACE_CLOSE_SCOPE PXR_NAMESPACE_OPEN_SCOPE
Definition: path.h:1432
#define PXR_NAMESPACE_CLOSE_SCOPE
Definition: pxr.h:91
#define ARCH_API
Definition: api.h:40
uint64_t ArchGetStopTickTime()
Definition: timing.h:126
ARCH_API uint64_t Arch_MeasureExecutionTime(uint64_t maxTicks, bool *reachedConsensus, void const *m, uint64_t(*callM)(void const *, int))
ARCH_API uint64_t ArchGetTickQuantum()