benchmark  1.9.4
benchmark.h
1 // Copyright 2015 Google Inc. All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 // Support for registering benchmarks for functions.
16 
17 /* Example usage:
18 // Define a function that executes the code to be measured a
19 // specified number of times:
20 static void BM_StringCreation(benchmark::State& state) {
21  for (auto _ : state)
22  std::string empty_string;
23 }
24 
25 // Register the function as a benchmark
26 BENCHMARK(BM_StringCreation);
27 
28 // Define another benchmark
29 static void BM_StringCopy(benchmark::State& state) {
30  std::string x = "hello";
31  for (auto _ : state)
32  std::string copy(x);
33 }
34 BENCHMARK(BM_StringCopy);
35 
36 // Augment the main() program to invoke benchmarks if specified
37 // via the --benchmark_filter command line flag. E.g.,
38 // my_unittest --benchmark_filter=all
39 // my_unittest --benchmark_filter=BM_StringCreation
40 // my_unittest --benchmark_filter=String
41 // my_unittest --benchmark_filter='Copy|Creation'
42 int main(int argc, char** argv) {
43  benchmark::MaybeReenterWithoutASLR(argc, argv);
44  benchmark::Initialize(&argc, argv);
45  benchmark::RunSpecifiedBenchmarks();
46  benchmark::Shutdown();
47  return 0;
48 }
49 
50 // Sometimes a family of microbenchmarks can be implemented with
51 // just one routine that takes an extra argument to specify which
52 // one of the family of benchmarks to run. For example, the following
53 // code defines a family of microbenchmarks for measuring the speed
54 // of memcpy() calls of different lengths:
55 
56 static void BM_memcpy(benchmark::State& state) {
57  char* src = new char[state.range(0)]; char* dst = new char[state.range(0)];
58  memset(src, 'x', state.range(0));
59  for (auto _ : state)
60  memcpy(dst, src, state.range(0));
61  state.SetBytesProcessed(state.iterations() * state.range(0));
62  delete[] src; delete[] dst;
63 }
64 BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(8<<10);
65 
66 // The preceding code is quite repetitive, and can be replaced with the
67 // following short-hand. The following invocation will pick a few
68 // appropriate arguments in the specified range and will generate a
69 // microbenchmark for each such argument.
70 BENCHMARK(BM_memcpy)->Range(8, 8<<10);
71 
72 // You might have a microbenchmark that depends on two inputs. For
73 // example, the following code defines a family of microbenchmarks for
74 // measuring the speed of set insertion.
75 static void BM_SetInsert(benchmark::State& state) {
76  set<int> data;
77  for (auto _ : state) {
78  state.PauseTiming();
79  data = ConstructRandomSet(state.range(0));
80  state.ResumeTiming();
81  for (int j = 0; j < state.range(1); ++j)
82  data.insert(RandomNumber());
83  }
84 }
85 BENCHMARK(BM_SetInsert)
86  ->Args({1<<10, 128})
87  ->Args({2<<10, 128})
88  ->Args({4<<10, 128})
89  ->Args({8<<10, 128})
90  ->Args({1<<10, 512})
91  ->Args({2<<10, 512})
92  ->Args({4<<10, 512})
93  ->Args({8<<10, 512});
94 
95 // The preceding code is quite repetitive, and can be replaced with
96 // the following short-hand. The following macro will pick a few
97 // appropriate arguments in the product of the two specified ranges
98 // and will generate a microbenchmark for each such pair.
99 BENCHMARK(BM_SetInsert)->Ranges({{1<<10, 8<<10}, {128, 512}});
100 
101 // For more complex patterns of inputs, passing a custom function
102 // to Apply allows programmatic specification of an
103 // arbitrary set of arguments to run the microbenchmark on.
104 // The following example enumerates a dense range on
105 // one parameter, and a sparse range on the second.
106 static void CustomArguments(benchmark::internal::Benchmark* b) {
107  for (int i = 0; i <= 10; ++i)
108  for (int j = 32; j <= 1024*1024; j *= 8)
109  b->Args({i, j});
110 }
111 BENCHMARK(BM_SetInsert)->Apply(CustomArguments);
112 
113 // Templated microbenchmarks work the same way:
114 // Produce then consume 'size' messages 'iters' times
115 // Measures throughput in the absence of multiprogramming.
116 template <class Q> int BM_Sequential(benchmark::State& state) {
117  Q q;
118  typename Q::value_type v;
119  for (auto _ : state) {
120  for (int i = state.range(0); i--; )
121  q.push(v);
122  for (int e = state.range(0); e--; )
123  q.Wait(&v);
124  }
125  // actually messages, not bytes:
126  state.SetBytesProcessed(state.iterations() * state.range(0));
127 }
128 BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue<int>)->Range(1<<0, 1<<10);
129 
130 Use `Benchmark::MinTime(double t)` to set the minimum time used to run the
131 benchmark. This option overrides the `benchmark_min_time` flag.
132 
133 void BM_test(benchmark::State& state) {
134  ... body ...
135 }
136 BENCHMARK(BM_test)->MinTime(2.0); // Run for at least 2 seconds.
137 
138 In a multithreaded test, it is guaranteed that none of the threads will start
139 until all have reached the loop start, and all will have finished before any
140 thread exits the loop body. As such, any global setup or teardown you want to
141 do can be wrapped in a check against the thread index:
142 
143 static void BM_MultiThreaded(benchmark::State& state) {
144  if (state.thread_index() == 0) {
145  // Setup code here.
146  }
147  for (auto _ : state) {
148  // Run the test as normal.
149  }
150  if (state.thread_index() == 0) {
151  // Teardown code here.
152  }
153 }
154 BENCHMARK(BM_MultiThreaded)->Threads(4);
155 
156 
157 If a benchmark runs a few milliseconds it may be hard to visually compare the
158 measured times, since the output data is given in nanoseconds per default. In
159 order to manually set the time unit, you can specify it manually:
160 
161 BENCHMARK(BM_test)->Unit(benchmark::kMillisecond);
162 */
163 
164 #ifndef BENCHMARK_BENCHMARK_H_
165 #define BENCHMARK_BENCHMARK_H_
166 
167 #include <stdint.h>
168 
169 #include <algorithm>
170 #include <atomic>
171 #include <cassert>
172 #include <cstddef>
173 #include <functional>
174 #include <initializer_list>
175 #include <iosfwd>
176 #include <limits>
177 #include <map>
178 #include <memory>
179 #include <set>
180 #include <string>
181 #include <type_traits>
182 #include <utility>
183 #include <vector>
184 
185 #include "benchmark/export.h"
186 
187 #if defined(_MSC_VER)
188 #include <intrin.h> // for _ReadWriteBarrier
189 #endif
190 
191 #define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName) \
192  TypeName(const TypeName&) = delete; \
193  TypeName& operator=(const TypeName&) = delete
194 
195 #ifdef BENCHMARK_HAS_CXX17
196 #define BENCHMARK_UNUSED [[maybe_unused]]
197 #elif defined(__GNUC__) || defined(__clang__)
198 #define BENCHMARK_UNUSED __attribute__((unused))
199 #else
200 #define BENCHMARK_UNUSED
201 #endif
202 
203 // Used to annotate functions, methods and classes so they
204 // are not optimized by the compiler. Useful for tests
205 // where you expect loops to stay in place churning cycles
206 #if defined(__clang__)
207 #define BENCHMARK_DONT_OPTIMIZE __attribute__((optnone))
208 #elif defined(__GNUC__) || defined(__GNUG__)
209 #define BENCHMARK_DONT_OPTIMIZE __attribute__((optimize(0)))
210 #else
211 // MSVC & Intel do not have a no-optimize attribute, only line pragmas
212 #define BENCHMARK_DONT_OPTIMIZE
213 #endif
214 
215 #if defined(__GNUC__) || defined(__clang__)
216 #define BENCHMARK_ALWAYS_INLINE __attribute__((always_inline))
217 #elif defined(_MSC_VER) && !defined(__clang__)
218 #define BENCHMARK_ALWAYS_INLINE __forceinline
219 #define __func__ __FUNCTION__
220 #else
221 #define BENCHMARK_ALWAYS_INLINE
222 #endif
223 
224 #define BENCHMARK_INTERNAL_TOSTRING2(x) #x
225 #define BENCHMARK_INTERNAL_TOSTRING(x) BENCHMARK_INTERNAL_TOSTRING2(x)
226 
227 // clang-format off
228 #if (defined(__GNUC__) && !defined(__NVCC__) && !defined(__NVCOMPILER)) || defined(__clang__)
229 #define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y)
230 #define BENCHMARK_DEPRECATED_MSG(msg) __attribute__((deprecated(msg)))
231 #define BENCHMARK_DISABLE_DEPRECATED_WARNING \
232  _Pragma("GCC diagnostic push") \
233  _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
234 #define BENCHMARK_RESTORE_DEPRECATED_WARNING _Pragma("GCC diagnostic pop")
235 #elif defined(__NVCOMPILER)
236 #define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y)
237 #define BENCHMARK_DEPRECATED_MSG(msg) __attribute__((deprecated(msg)))
238 #define BENCHMARK_DISABLE_DEPRECATED_WARNING \
239  _Pragma("diagnostic push") \
240  _Pragma("diag_suppress deprecated_entity_with_custom_message")
241 #define BENCHMARK_RESTORE_DEPRECATED_WARNING _Pragma("diagnostic pop")
242 #else
243 #define BENCHMARK_BUILTIN_EXPECT(x, y) x
244 #define BENCHMARK_DEPRECATED_MSG(msg)
245 #define BENCHMARK_WARNING_MSG(msg) \
246  __pragma(message(__FILE__ "(" BENCHMARK_INTERNAL_TOSTRING( \
247  __LINE__) ") : warning note: " msg))
248 #define BENCHMARK_DISABLE_DEPRECATED_WARNING
249 #define BENCHMARK_RESTORE_DEPRECATED_WARNING
250 #endif
251 // clang-format on
252 
253 #if defined(__GNUC__) && !defined(__clang__)
254 #define BENCHMARK_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
255 #endif
256 
257 #ifndef __has_builtin
258 #define __has_builtin(x) 0
259 #endif
260 
261 #if defined(__GNUC__) || __has_builtin(__builtin_unreachable)
262 #define BENCHMARK_UNREACHABLE() __builtin_unreachable()
263 #elif defined(_MSC_VER)
264 #define BENCHMARK_UNREACHABLE() __assume(false)
265 #else
266 #define BENCHMARK_UNREACHABLE() ((void)0)
267 #endif
268 
269 #if defined(__GNUC__)
270 // Determine the cacheline size based on architecture
271 #if defined(__i386__) || defined(__x86_64__)
272 #define BENCHMARK_INTERNAL_CACHELINE_SIZE 64
273 #elif defined(__powerpc64__)
274 #define BENCHMARK_INTERNAL_CACHELINE_SIZE 128
275 #elif defined(__aarch64__)
276 #define BENCHMARK_INTERNAL_CACHELINE_SIZE 64
277 #elif defined(__arm__)
278 // Cache line sizes for ARM: These values are not strictly correct since
279 // cache line sizes depend on implementations, not architectures. There
280 // are even implementations with cache line sizes configurable at boot
281 // time.
282 #if defined(__ARM_ARCH_5T__)
283 #define BENCHMARK_INTERNAL_CACHELINE_SIZE 32
284 #elif defined(__ARM_ARCH_7A__)
285 #define BENCHMARK_INTERNAL_CACHELINE_SIZE 64
286 #endif // ARM_ARCH
287 #endif // arches
288 #endif // __GNUC__
289 
290 #ifndef BENCHMARK_INTERNAL_CACHELINE_SIZE
291 // A reasonable default guess. Note that overestimates tend to waste more
292 // space, while underestimates tend to waste more time.
293 #define BENCHMARK_INTERNAL_CACHELINE_SIZE 64
294 #endif
295 
296 #if defined(__GNUC__)
297 // Indicates that the declared object be cache aligned using
298 // `BENCHMARK_INTERNAL_CACHELINE_SIZE` (see above).
299 #define BENCHMARK_INTERNAL_CACHELINE_ALIGNED \
300  __attribute__((aligned(BENCHMARK_INTERNAL_CACHELINE_SIZE)))
301 #elif defined(_MSC_VER)
302 #define BENCHMARK_INTERNAL_CACHELINE_ALIGNED \
303  __declspec(align(BENCHMARK_INTERNAL_CACHELINE_SIZE))
304 #else
305 #define BENCHMARK_INTERNAL_CACHELINE_ALIGNED
306 #endif
307 
308 #if defined(_MSC_VER)
309 #pragma warning(push)
310 // C4251: <symbol> needs to have dll-interface to be used by clients of class
311 #pragma warning(disable : 4251)
312 #endif // _MSC_VER_
313 
314 namespace benchmark {
315 
316 namespace internal {
317 #if (__cplusplus < 201402L || (defined(_MSC_VER) && _MSVC_LANG < 201402L))
318 template <typename T, typename... Args>
319 std::unique_ptr<T> make_unique(Args&&... args) {
320  return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
321 }
322 #else
323 using ::std::make_unique;
324 #endif
325 } // namespace internal
326 
327 class BenchmarkReporter;
328 class State;
329 
330 using IterationCount = int64_t;
331 
332 // Define alias of Setup/Teardown callback function type
333 using callback_function = std::function<void(const benchmark::State&)>;
334 
335 // Default number of minimum benchmark running time in seconds.
336 const char kDefaultMinTimeStr[] = "0.5s";
337 
338 BENCHMARK_EXPORT void MaybeReenterWithoutASLR(int, char**);
339 
340 // Returns the version of the library.
341 BENCHMARK_EXPORT std::string GetBenchmarkVersion();
342 
343 BENCHMARK_EXPORT void PrintDefaultHelp();
344 
345 BENCHMARK_EXPORT void Initialize(int* argc, char** argv,
346  void (*HelperPrintf)() = PrintDefaultHelp);
347 BENCHMARK_EXPORT void Shutdown();
348 
349 // Report to stdout all arguments in 'argv' as unrecognized except the first.
350 // Returns true there is at least on unrecognized argument (i.e. 'argc' > 1).
351 BENCHMARK_EXPORT bool ReportUnrecognizedArguments(int argc, char** argv);
352 
353 // Returns the current value of --benchmark_filter.
354 BENCHMARK_EXPORT std::string GetBenchmarkFilter();
355 
356 // Sets a new value to --benchmark_filter. (This will override this flag's
357 // current value).
358 // Should be called after `benchmark::Initialize()`, as
359 // `benchmark::Initialize()` will override the flag's value.
360 BENCHMARK_EXPORT void SetBenchmarkFilter(std::string value);
361 
362 // Returns the current value of --v (command line value for verbosity).
363 BENCHMARK_EXPORT int32_t GetBenchmarkVerbosity();
364 
365 // Creates a default display reporter. Used by the library when no display
366 // reporter is provided, but also made available for external use in case a
367 // custom reporter should respect the `--benchmark_format` flag as a fallback
368 BENCHMARK_EXPORT BenchmarkReporter* CreateDefaultDisplayReporter();
369 
370 // Generate a list of benchmarks matching the specified --benchmark_filter flag
371 // and if --benchmark_list_tests is specified return after printing the name
372 // of each matching benchmark. Otherwise run each matching benchmark and
373 // report the results.
374 //
375 // spec : Specify the benchmarks to run. If users do not specify this arg,
376 // then the value of FLAGS_benchmark_filter
377 // will be used.
378 //
379 // The second and third overload use the specified 'display_reporter' and
380 // 'file_reporter' respectively. 'file_reporter' will write to the file
381 // specified
382 // by '--benchmark_out'. If '--benchmark_out' is not given the
383 // 'file_reporter' is ignored.
384 //
385 // RETURNS: The number of matching benchmarks.
386 BENCHMARK_EXPORT size_t RunSpecifiedBenchmarks();
387 BENCHMARK_EXPORT size_t RunSpecifiedBenchmarks(std::string spec);
388 
389 BENCHMARK_EXPORT size_t
390 RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter);
391 BENCHMARK_EXPORT size_t
392 RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, std::string spec);
393 
394 BENCHMARK_EXPORT size_t RunSpecifiedBenchmarks(
395  BenchmarkReporter* display_reporter, BenchmarkReporter* file_reporter);
396 BENCHMARK_EXPORT size_t
397 RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
398  BenchmarkReporter* file_reporter, std::string spec);
399 
400 // TimeUnit is passed to a benchmark in order to specify the order of magnitude
401 // for the measured time.
402 enum TimeUnit { kNanosecond, kMicrosecond, kMillisecond, kSecond };
403 
404 BENCHMARK_EXPORT TimeUnit GetDefaultTimeUnit();
405 
406 // Sets the default time unit the benchmarks use
407 // Has to be called before the benchmark loop to take effect
408 BENCHMARK_EXPORT void SetDefaultTimeUnit(TimeUnit unit);
409 
410 // If a MemoryManager is registered (via RegisterMemoryManager()),
411 // it can be used to collect and report allocation metrics for a run of the
412 // benchmark.
414  public:
415  static constexpr int64_t TombstoneValue = std::numeric_limits<int64_t>::max();
416 
417  struct Result {
418  Result()
419  : num_allocs(0),
420  max_bytes_used(0),
421  total_allocated_bytes(TombstoneValue),
422  net_heap_growth(TombstoneValue),
423  memory_iterations(0) {}
424 
425  // The number of allocations made in total between Start and Stop.
426  int64_t num_allocs;
427 
428  // The peak memory use between Start and Stop.
429  int64_t max_bytes_used;
430 
431  // The total memory allocated, in bytes, between Start and Stop.
432  // Init'ed to TombstoneValue if metric not available.
433  int64_t total_allocated_bytes;
434 
435  // The net changes in memory, in bytes, between Start and Stop.
436  // ie., total_allocated_bytes - total_deallocated_bytes.
437  // Init'ed to TombstoneValue if metric not available.
438  int64_t net_heap_growth;
439 
440  IterationCount memory_iterations;
441  };
442 
443  virtual ~MemoryManager() {}
444 
445  // Implement this to start recording allocation information.
446  virtual void Start() = 0;
447 
448  // Implement this to stop recording and fill out the given Result structure.
449  virtual void Stop(Result& result) = 0;
450 };
451 
452 // Register a MemoryManager instance that will be used to collect and report
453 // allocation measurements for benchmark runs.
454 BENCHMARK_EXPORT
455 void RegisterMemoryManager(MemoryManager* memory_manager);
456 
457 // If a ProfilerManager is registered (via RegisterProfilerManager()), the
458 // benchmark will be run an additional time under the profiler to collect and
459 // report profile metrics for the run of the benchmark.
461  public:
462  virtual ~ProfilerManager() {}
463 
464  // This is called after `Setup()` code and right before the benchmark is run.
465  virtual void AfterSetupStart() = 0;
466 
467  // This is called before `Teardown()` code and right after the benchmark
468  // completes.
469  virtual void BeforeTeardownStop() = 0;
470 };
471 
472 // Register a ProfilerManager instance that will be used to collect and report
473 // profile measurements for benchmark runs.
474 BENCHMARK_EXPORT
475 void RegisterProfilerManager(ProfilerManager* profiler_manager);
476 
477 // Add a key-value pair to output as part of the context stanza in the report.
478 BENCHMARK_EXPORT
479 void AddCustomContext(const std::string& key, const std::string& value);
480 
481 namespace internal {
482 class Benchmark;
483 class BenchmarkImp;
484 class BenchmarkFamilies;
485 
486 BENCHMARK_EXPORT std::map<std::string, std::string>*& GetGlobalContext();
487 
488 BENCHMARK_EXPORT
489 void UseCharPointer(char const volatile*);
490 
491 // Take ownership of the pointer and register the benchmark. Return the
492 // registered benchmark.
493 BENCHMARK_EXPORT Benchmark* RegisterBenchmarkInternal(
494  std::unique_ptr<Benchmark>);
495 
496 // Ensure that the standard streams are properly initialized in every TU.
497 BENCHMARK_EXPORT int InitializeStreams();
498 BENCHMARK_UNUSED static int stream_init_anchor = InitializeStreams();
499 
500 } // namespace internal
501 
502 #if (!defined(__GNUC__) && !defined(__clang__)) || defined(__pnacl__) || \
503  defined(__EMSCRIPTEN__)
504 #define BENCHMARK_HAS_NO_INLINE_ASSEMBLY
505 #endif
506 
507 // Force the compiler to flush pending writes to global memory. Acts as an
508 // effective read/write barrier
509 inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() {
510  std::atomic_signal_fence(std::memory_order_acq_rel);
511 }
512 
513 // The DoNotOptimize(...) function can be used to prevent a value or
514 // expression from being optimized away by the compiler. This function is
515 // intended to add little to no overhead.
516 // See: https://youtu.be/nXaxk27zwlk?t=2441
517 #ifndef BENCHMARK_HAS_NO_INLINE_ASSEMBLY
518 #if !defined(__GNUC__) || defined(__llvm__) || defined(__INTEL_COMPILER)
519 template <class Tp>
520 BENCHMARK_DEPRECATED_MSG(
521  "The const-ref version of this method can permit "
522  "undesired compiler optimizations in benchmarks")
523 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
524  asm volatile("" : : "r,m"(value) : "memory");
525 }
526 
527 template <class Tp>
528 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) {
529 #if defined(__clang__)
530  asm volatile("" : "+r,m"(value) : : "memory");
531 #else
532  asm volatile("" : "+m,r"(value) : : "memory");
533 #endif
534 }
535 
536 template <class Tp>
537 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp&& value) {
538 #if defined(__clang__)
539  asm volatile("" : "+r,m"(value) : : "memory");
540 #else
541  asm volatile("" : "+m,r"(value) : : "memory");
542 #endif
543 }
544 // !defined(__GNUC__) || defined(__llvm__) || defined(__INTEL_COMPILER)
545 #elif (__GNUC__ >= 5)
546 // Workaround for a bug with full argument copy overhead with GCC.
547 // See: #1340 and https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105519
548 template <class Tp>
549 BENCHMARK_DEPRECATED_MSG(
550  "The const-ref version of this method can permit "
551  "undesired compiler optimizations in benchmarks")
552 inline BENCHMARK_ALWAYS_INLINE
553  typename std::enable_if<std::is_trivially_copyable<Tp>::value &&
554  (sizeof(Tp) <= sizeof(Tp*))>::type
555  DoNotOptimize(Tp const& value) {
556  asm volatile("" : : "r,m"(value) : "memory");
557 }
558 
559 template <class Tp>
560 BENCHMARK_DEPRECATED_MSG(
561  "The const-ref version of this method can permit "
562  "undesired compiler optimizations in benchmarks")
563 inline BENCHMARK_ALWAYS_INLINE
564  typename std::enable_if<!std::is_trivially_copyable<Tp>::value ||
565  (sizeof(Tp) > sizeof(Tp*))>::type
566  DoNotOptimize(Tp const& value) {
567  asm volatile("" : : "m"(value) : "memory");
568 }
569 
570 template <class Tp>
571 inline BENCHMARK_ALWAYS_INLINE
572  typename std::enable_if<std::is_trivially_copyable<Tp>::value &&
573  (sizeof(Tp) <= sizeof(Tp*))>::type
574  DoNotOptimize(Tp& value) {
575  asm volatile("" : "+m,r"(value) : : "memory");
576 }
577 
578 template <class Tp>
579 inline BENCHMARK_ALWAYS_INLINE
580  typename std::enable_if<!std::is_trivially_copyable<Tp>::value ||
581  (sizeof(Tp) > sizeof(Tp*))>::type
582  DoNotOptimize(Tp& value) {
583  asm volatile("" : "+m"(value) : : "memory");
584 }
585 
586 template <class Tp>
587 inline BENCHMARK_ALWAYS_INLINE
588  typename std::enable_if<std::is_trivially_copyable<Tp>::value &&
589  (sizeof(Tp) <= sizeof(Tp*))>::type
590  DoNotOptimize(Tp&& value) {
591  asm volatile("" : "+m,r"(value) : : "memory");
592 }
593 
594 template <class Tp>
595 inline BENCHMARK_ALWAYS_INLINE
596  typename std::enable_if<!std::is_trivially_copyable<Tp>::value ||
597  (sizeof(Tp) > sizeof(Tp*))>::type
598  DoNotOptimize(Tp&& value) {
599  asm volatile("" : "+m"(value) : : "memory");
600 }
601 // !defined(__GNUC__) || defined(__llvm__) || defined(__INTEL_COMPILER)
602 #endif
603 
604 #elif defined(_MSC_VER)
605 template <class Tp>
606 BENCHMARK_DEPRECATED_MSG(
607  "The const-ref version of this method can permit "
608  "undesired compiler optimizations in benchmarks")
609 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
610  internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
611  _ReadWriteBarrier();
612 }
613 
614 #else
615 template <class Tp>
616 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp&& value) {
617  internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
618 }
619 // FIXME Add ClobberMemory() for non-gnu and non-msvc compilers, before C++11.
620 #endif
621 
622 // This class is used for user-defined counters.
623 class Counter {
624  public:
625  enum Flags {
626  kDefaults = 0,
627  // Mark the counter as a rate. It will be presented divided
628  // by the duration of the benchmark.
629  kIsRate = 1 << 0,
630  // Mark the counter as a thread-average quantity. It will be
631  // presented divided by the number of threads.
632  kAvgThreads = 1 << 1,
633  // Mark the counter as a thread-average rate. See above.
634  kAvgThreadsRate = kIsRate | kAvgThreads,
635  // Mark the counter as a constant value, valid/same for *every* iteration.
636  // When reporting, it will be *multiplied* by the iteration count.
637  kIsIterationInvariant = 1 << 2,
638  // Mark the counter as a constant rate.
639  // When reporting, it will be *multiplied* by the iteration count
640  // and then divided by the duration of the benchmark.
641  kIsIterationInvariantRate = kIsRate | kIsIterationInvariant,
642  // Mark the counter as a iteration-average quantity.
643  // It will be presented divided by the number of iterations.
644  kAvgIterations = 1 << 3,
645  // Mark the counter as a iteration-average rate. See above.
646  kAvgIterationsRate = kIsRate | kAvgIterations,
647 
648  // In the end, invert the result. This is always done last!
649  kInvert = 1 << 31
650  };
651 
652  enum OneK {
653  // 1'000 items per 1k
654  kIs1000 = 1000,
655  // 1'024 items per 1k
656  kIs1024 = 1024
657  };
658 
659  double value;
660  Flags flags;
661  OneK oneK;
662 
663  BENCHMARK_ALWAYS_INLINE
664  Counter(double v = 0., Flags f = kDefaults, OneK k = kIs1000)
665  : value(v), flags(f), oneK(k) {}
666 
667  BENCHMARK_ALWAYS_INLINE operator double const&() const { return value; }
668  BENCHMARK_ALWAYS_INLINE operator double&() { return value; }
669 };
670 
671 // A helper for user code to create unforeseen combinations of Flags, without
672 // having to do this cast manually each time, or providing this operator.
673 Counter::Flags inline operator|(const Counter::Flags& LHS,
674  const Counter::Flags& RHS) {
675  return static_cast<Counter::Flags>(static_cast<int>(LHS) |
676  static_cast<int>(RHS));
677 }
678 
679 // This is the container for the user-defined counters.
680 typedef std::map<std::string, Counter> UserCounters;
681 
682 // BigO is passed to a benchmark in order to specify the asymptotic
683 // computational
684 // complexity for the benchmark. In case oAuto is selected, complexity will be
685 // calculated automatically to the best fit.
686 enum BigO { oNone, o1, oN, oNSquared, oNCubed, oLogN, oNLogN, oAuto, oLambda };
687 
688 typedef int64_t ComplexityN;
689 
690 enum StatisticUnit { kTime, kPercentage };
691 
692 // BigOFunc is passed to a benchmark in order to specify the asymptotic
693 // computational complexity for the benchmark.
694 typedef double(BigOFunc)(ComplexityN);
695 
696 // StatisticsFunc is passed to a benchmark in order to compute some descriptive
697 // statistics over all the measurements of some type
698 typedef double(StatisticsFunc)(const std::vector<double>&);
699 
700 namespace internal {
701 struct Statistics {
702  std::string name_;
703  StatisticsFunc* compute_;
704  StatisticUnit unit_;
705 
706  Statistics(const std::string& name, StatisticsFunc* compute,
707  StatisticUnit unit = kTime)
708  : name_(name), compute_(compute), unit_(unit) {}
709 };
710 
711 class BenchmarkInstance;
712 class ThreadTimer;
713 class ThreadManager;
715 
716 enum AggregationReportMode : unsigned {
717  // The mode has not been manually specified
718  ARM_Unspecified = 0,
719  // The mode is user-specified.
720  // This may or may not be set when the following bit-flags are set.
721  ARM_Default = 1U << 0U,
722  // File reporter should only output aggregates.
723  ARM_FileReportAggregatesOnly = 1U << 1U,
724  // Display reporter should only output aggregates
725  ARM_DisplayReportAggregatesOnly = 1U << 2U,
726  // Both reporters should only display aggregates.
727  ARM_ReportAggregatesOnly =
728  ARM_FileReportAggregatesOnly | ARM_DisplayReportAggregatesOnly
729 };
730 
731 enum Skipped : unsigned {
732  NotSkipped = 0,
733  SkippedWithMessage,
734  SkippedWithError
735 };
736 
737 } // namespace internal
738 
739 #if defined(_MSC_VER)
740 #pragma warning(push)
741 // C4324: 'benchmark::State': structure was padded due to alignment specifier
742 #pragma warning(disable : 4324)
743 #endif // _MSC_VER_
744 // State is passed to a running Benchmark and contains state for the
745 // benchmark to use.
746 class BENCHMARK_EXPORT BENCHMARK_INTERNAL_CACHELINE_ALIGNED State {
747  public:
748  struct StateIterator;
749  friend struct StateIterator;
750 
751  // Returns iterators used to run each iteration of a benchmark using a
752  // C++11 ranged-based for loop. These functions should not be called directly.
753  //
754  // REQUIRES: The benchmark has not started running yet. Neither begin nor end
755  // have been called previously.
756  //
757  // NOTE: KeepRunning may not be used after calling either of these functions.
758  inline BENCHMARK_ALWAYS_INLINE StateIterator begin();
759  inline BENCHMARK_ALWAYS_INLINE StateIterator end();
760 
761  // Returns true if the benchmark should continue through another iteration.
762  // NOTE: A benchmark may not return from the test until KeepRunning() has
763  // returned false.
764  inline bool KeepRunning();
765 
766  // Returns true iff the benchmark should run n more iterations.
767  // REQUIRES: 'n' > 0.
768  // NOTE: A benchmark must not return from the test until KeepRunningBatch()
769  // has returned false.
770  // NOTE: KeepRunningBatch() may overshoot by up to 'n' iterations.
771  //
772  // Intended usage:
773  // while (state.KeepRunningBatch(1000)) {
774  // // process 1000 elements
775  // }
776  inline bool KeepRunningBatch(IterationCount n);
777 
778  // REQUIRES: timer is running and 'SkipWithMessage(...)' or
779  // 'SkipWithError(...)' has not been called by the current thread.
780  // Stop the benchmark timer. If not called, the timer will be
781  // automatically stopped after the last iteration of the benchmark loop.
782  //
783  // For threaded benchmarks the PauseTiming() function only pauses the timing
784  // for the current thread.
785  //
786  // NOTE: The "real time" measurement is per-thread. If different threads
787  // report different measurements the largest one is reported.
788  //
789  // NOTE: PauseTiming()/ResumeTiming() are relatively
790  // heavyweight, and so their use should generally be avoided
791  // within each benchmark iteration, if possible.
792  void PauseTiming();
793 
794  // REQUIRES: timer is not running and 'SkipWithMessage(...)' or
795  // 'SkipWithError(...)' has not been called by the current thread.
796  // Start the benchmark timer. The timer is NOT running on entrance to the
797  // benchmark function. It begins running after control flow enters the
798  // benchmark loop.
799  //
800  // NOTE: PauseTiming()/ResumeTiming() are relatively
801  // heavyweight, and so their use should generally be avoided
802  // within each benchmark iteration, if possible.
803  void ResumeTiming();
804 
805  // REQUIRES: 'SkipWithMessage(...)' or 'SkipWithError(...)' has not been
806  // called previously by the current thread.
807  // Report the benchmark as resulting in being skipped with the specified
808  // 'msg'.
809  // After this call the user may explicitly 'return' from the benchmark.
810  //
811  // If the ranged-for style of benchmark loop is used, the user must explicitly
812  // break from the loop, otherwise all future iterations will be run.
813  // If the 'KeepRunning()' loop is used the current thread will automatically
814  // exit the loop at the end of the current iteration.
815  //
816  // For threaded benchmarks only the current thread stops executing and future
817  // calls to `KeepRunning()` will block until all threads have completed
818  // the `KeepRunning()` loop. If multiple threads report being skipped only the
819  // first skip message is used.
820  //
821  // NOTE: Calling 'SkipWithMessage(...)' does not cause the benchmark to exit
822  // the current scope immediately. If the function is called from within
823  // the 'KeepRunning()' loop the current iteration will finish. It is the users
824  // responsibility to exit the scope as needed.
825  void SkipWithMessage(const std::string& msg);
826 
827  // REQUIRES: 'SkipWithMessage(...)' or 'SkipWithError(...)' has not been
828  // called previously by the current thread.
829  // Report the benchmark as resulting in an error with the specified 'msg'.
830  // After this call the user may explicitly 'return' from the benchmark.
831  //
832  // If the ranged-for style of benchmark loop is used, the user must explicitly
833  // break from the loop, otherwise all future iterations will be run.
834  // If the 'KeepRunning()' loop is used the current thread will automatically
835  // exit the loop at the end of the current iteration.
836  //
837  // For threaded benchmarks only the current thread stops executing and future
838  // calls to `KeepRunning()` will block until all threads have completed
839  // the `KeepRunning()` loop. If multiple threads report an error only the
840  // first error message is used.
841  //
842  // NOTE: Calling 'SkipWithError(...)' does not cause the benchmark to exit
843  // the current scope immediately. If the function is called from within
844  // the 'KeepRunning()' loop the current iteration will finish. It is the users
845  // responsibility to exit the scope as needed.
846  void SkipWithError(const std::string& msg);
847 
848  // Returns true if 'SkipWithMessage(...)' or 'SkipWithError(...)' was called.
849  bool skipped() const { return internal::NotSkipped != skipped_; }
850 
851  // Returns true if an error has been reported with 'SkipWithError(...)'.
852  bool error_occurred() const { return internal::SkippedWithError == skipped_; }
853 
854  // REQUIRES: called exactly once per iteration of the benchmarking loop.
855  // Set the manually measured time for this benchmark iteration, which
856  // is used instead of automatically measured time if UseManualTime() was
857  // specified.
858  //
859  // For threaded benchmarks the final value will be set to the largest
860  // reported values.
861  void SetIterationTime(double seconds);
862 
863  // Set the number of bytes processed by the current benchmark
864  // execution. This routine is typically called once at the end of a
865  // throughput oriented benchmark.
866  //
867  // REQUIRES: a benchmark has exited its benchmarking loop.
868  BENCHMARK_ALWAYS_INLINE
869  void SetBytesProcessed(int64_t bytes) {
870  counters["bytes_per_second"] =
871  Counter(static_cast<double>(bytes), Counter::kIsRate, Counter::kIs1024);
872  }
873 
874  BENCHMARK_ALWAYS_INLINE
875  int64_t bytes_processed() const {
876  if (counters.find("bytes_per_second") != counters.end())
877  return static_cast<int64_t>(counters.at("bytes_per_second"));
878  return 0;
879  }
880 
881  // If this routine is called with complexity_n > 0 and complexity report is
882  // requested for the
883  // family benchmark, then current benchmark will be part of the computation
884  // and complexity_n will
885  // represent the length of N.
886  BENCHMARK_ALWAYS_INLINE
887  void SetComplexityN(ComplexityN complexity_n) {
888  complexity_n_ = complexity_n;
889  }
890 
891  BENCHMARK_ALWAYS_INLINE
892  ComplexityN complexity_length_n() const { return complexity_n_; }
893 
894  // If this routine is called with items > 0, then an items/s
895  // label is printed on the benchmark report line for the currently
896  // executing benchmark. It is typically called at the end of a processing
897  // benchmark where a processing items/second output is desired.
898  //
899  // REQUIRES: a benchmark has exited its benchmarking loop.
900  BENCHMARK_ALWAYS_INLINE
901  void SetItemsProcessed(int64_t items) {
902  counters["items_per_second"] =
903  Counter(static_cast<double>(items), benchmark::Counter::kIsRate);
904  }
905 
906  BENCHMARK_ALWAYS_INLINE
907  int64_t items_processed() const {
908  if (counters.find("items_per_second") != counters.end())
909  return static_cast<int64_t>(counters.at("items_per_second"));
910  return 0;
911  }
912 
913  // If this routine is called, the specified label is printed at the
914  // end of the benchmark report line for the currently executing
915  // benchmark. Example:
916  // static void BM_Compress(benchmark::State& state) {
917  // ...
918  // double compress = input_size / output_size;
919  // state.SetLabel(StrFormat("compress:%.1f%%", 100.0*compression));
920  // }
921  // Produces output that looks like:
922  // BM_Compress 50 50 14115038 compress:27.3%
923  //
924  // REQUIRES: a benchmark has exited its benchmarking loop.
925  void SetLabel(const std::string& label);
926 
927  // Range arguments for this run. CHECKs if the argument has been set.
928  BENCHMARK_ALWAYS_INLINE
929  int64_t range(std::size_t pos = 0) const {
930  assert(range_.size() > pos);
931  return range_[pos];
932  }
933 
934  BENCHMARK_DEPRECATED_MSG("use 'range(0)' instead")
935  int64_t range_x() const { return range(0); }
936 
937  BENCHMARK_DEPRECATED_MSG("use 'range(1)' instead")
938  int64_t range_y() const { return range(1); }
939 
940  // Number of threads concurrently executing the benchmark.
941  BENCHMARK_ALWAYS_INLINE
942  int threads() const { return threads_; }
943 
944  // Index of the executing thread. Values from [0, threads).
945  BENCHMARK_ALWAYS_INLINE
946  int thread_index() const { return thread_index_; }
947 
948  BENCHMARK_ALWAYS_INLINE
949  IterationCount iterations() const {
950  if (BENCHMARK_BUILTIN_EXPECT(!started_, false)) {
951  return 0;
952  }
953  return max_iterations - total_iterations_ + batch_leftover_;
954  }
955 
956  BENCHMARK_ALWAYS_INLINE
957  std::string name() const { return name_; }
958 
959  private:
960  // items we expect on the first cache line (ie 64 bytes of the struct)
961  // When total_iterations_ is 0, KeepRunning() and friends will return false.
962  // May be larger than max_iterations.
963  IterationCount total_iterations_;
964 
965  // When using KeepRunningBatch(), batch_leftover_ holds the number of
966  // iterations beyond max_iters that were run. Used to track
967  // completed_iterations_ accurately.
968  IterationCount batch_leftover_;
969 
970  public:
971  const IterationCount max_iterations;
972 
973  private:
974  bool started_;
975  bool finished_;
976  internal::Skipped skipped_;
977 
978  // items we don't need on the first cache line
979  std::vector<int64_t> range_;
980 
981  ComplexityN complexity_n_;
982 
983  public:
984  // Container for user-defined counters.
985  UserCounters counters;
986 
987  private:
988  State(std::string name, IterationCount max_iters,
989  const std::vector<int64_t>& ranges, int thread_i, int n_threads,
991  internal::PerfCountersMeasurement* perf_counters_measurement,
992  ProfilerManager* profiler_manager);
993 
994  void StartKeepRunning();
995  // Implementation of KeepRunning() and KeepRunningBatch().
996  // is_batch must be true unless n is 1.
997  inline bool KeepRunningInternal(IterationCount n, bool is_batch);
998  void FinishKeepRunning();
999 
1000  const std::string name_;
1001  const int thread_index_;
1002  const int threads_;
1003 
1004  internal::ThreadTimer* const timer_;
1005  internal::ThreadManager* const manager_;
1006  internal::PerfCountersMeasurement* const perf_counters_measurement_;
1007  ProfilerManager* const profiler_manager_;
1008 
1009  friend class internal::BenchmarkInstance;
1010 };
1011 #if defined(_MSC_VER)
1012 #pragma warning(pop)
1013 #endif // _MSC_VER_
1014 
1015 inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunning() {
1016  return KeepRunningInternal(1, /*is_batch=*/false);
1017 }
1018 
1019 inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunningBatch(IterationCount n) {
1020  return KeepRunningInternal(n, /*is_batch=*/true);
1021 }
1022 
1023 inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunningInternal(IterationCount n,
1024  bool is_batch) {
1025  // total_iterations_ is set to 0 by the constructor, and always set to a
1026  // nonzero value by StartKepRunning().
1027  assert(n > 0);
1028  // n must be 1 unless is_batch is true.
1029  assert(is_batch || n == 1);
1030  if (BENCHMARK_BUILTIN_EXPECT(total_iterations_ >= n, true)) {
1031  total_iterations_ -= n;
1032  return true;
1033  }
1034  if (!started_) {
1035  StartKeepRunning();
1036  if (!skipped() && total_iterations_ >= n) {
1037  total_iterations_ -= n;
1038  return true;
1039  }
1040  }
1041  // For non-batch runs, total_iterations_ must be 0 by now.
1042  if (is_batch && total_iterations_ != 0) {
1043  batch_leftover_ = n - total_iterations_;
1044  total_iterations_ = 0;
1045  return true;
1046  }
1047  FinishKeepRunning();
1048  return false;
1049 }
1050 
1052  struct BENCHMARK_UNUSED Value {};
1053  typedef std::forward_iterator_tag iterator_category;
1054  typedef Value value_type;
1055  typedef Value reference;
1056  typedef Value pointer;
1057  typedef std::ptrdiff_t difference_type;
1058 
1059  private:
1060  friend class State;
1061  BENCHMARK_ALWAYS_INLINE
1062  StateIterator() : cached_(0), parent_() {}
1063 
1064  BENCHMARK_ALWAYS_INLINE
1065  explicit StateIterator(State* st)
1066  : cached_(st->skipped() ? 0 : st->max_iterations), parent_(st) {}
1067 
1068  public:
1069  BENCHMARK_ALWAYS_INLINE
1070  Value operator*() const { return Value(); }
1071 
1072  BENCHMARK_ALWAYS_INLINE
1073  StateIterator& operator++() {
1074  assert(cached_ > 0);
1075  --cached_;
1076  return *this;
1077  }
1078 
1079  BENCHMARK_ALWAYS_INLINE
1080  bool operator!=(StateIterator const&) const {
1081  if (BENCHMARK_BUILTIN_EXPECT(cached_ != 0, true)) return true;
1082  parent_->FinishKeepRunning();
1083  return false;
1084  }
1085 
1086  private:
1087  IterationCount cached_;
1088  State* const parent_;
1089 };
1090 
1091 inline BENCHMARK_ALWAYS_INLINE State::StateIterator State::begin() {
1092  return StateIterator(this);
1093 }
1094 inline BENCHMARK_ALWAYS_INLINE State::StateIterator State::end() {
1095  StartKeepRunning();
1096  return StateIterator();
1097 }
1098 
1099 // Base class for user-defined multi-threading
1101  virtual ~ThreadRunnerBase() {}
1102  virtual void RunThreads(const std::function<void(int)>& fn) = 0;
1103 };
1104 
1105 namespace internal {
1106 
1107 // Define alias of ThreadRunner factory function type
1108 using threadrunner_factory =
1109  std::function<std::unique_ptr<ThreadRunnerBase>(int)>;
1110 
1111 typedef void(Function)(State&);
1112 
1113 // ------------------------------------------------------
1114 // Benchmark registration object. The BENCHMARK() macro expands
1115 // into an internal::Benchmark* object. Various methods can
1116 // be called on this object to change the properties of the benchmark.
1117 // Each method returns "this" so that multiple method calls can
1118 // chained into one expression.
1119 class BENCHMARK_EXPORT Benchmark {
1120  public:
1121  virtual ~Benchmark();
1122 
1123  // Note: the following methods all return "this" so that multiple
1124  // method calls can be chained together in one expression.
1125 
1126  // Specify the name of the benchmark
1127  Benchmark* Name(const std::string& name);
1128 
1129  // Run this benchmark once with "x" as the extra argument passed
1130  // to the function.
1131  // REQUIRES: The function passed to the constructor must accept an arg1.
1132  Benchmark* Arg(int64_t x);
1133 
1134  // Run this benchmark with the given time unit for the generated output report
1135  Benchmark* Unit(TimeUnit unit);
1136 
1137  // Run this benchmark once for a number of values picked from the
1138  // range [start..limit]. (start and limit are always picked.)
1139  // REQUIRES: The function passed to the constructor must accept an arg1.
1140  Benchmark* Range(int64_t start, int64_t limit);
1141 
1142  // Run this benchmark once for all values in the range [start..limit] with
1143  // specific step
1144  // REQUIRES: The function passed to the constructor must accept an arg1.
1145  Benchmark* DenseRange(int64_t start, int64_t limit, int step = 1);
1146 
1147  // Run this benchmark once with "args" as the extra arguments passed
1148  // to the function.
1149  // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
1150  Benchmark* Args(const std::vector<int64_t>& args);
1151 
1152  // Equivalent to Args({x, y})
1153  // NOTE: This is a legacy C++03 interface provided for compatibility only.
1154  // New code should use 'Args'.
1155  Benchmark* ArgPair(int64_t x, int64_t y) {
1156  std::vector<int64_t> args;
1157  args.push_back(x);
1158  args.push_back(y);
1159  return Args(args);
1160  }
1161 
1162  // Run this benchmark once for a number of values picked from the
1163  // ranges [start..limit]. (starts and limits are always picked.)
1164  // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
1165  Benchmark* Ranges(const std::vector<std::pair<int64_t, int64_t>>& ranges);
1166 
1167  // Run this benchmark once for each combination of values in the (cartesian)
1168  // product of the supplied argument lists.
1169  // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
1170  Benchmark* ArgsProduct(const std::vector<std::vector<int64_t>>& arglists);
1171 
1172  // Equivalent to ArgNames({name})
1173  Benchmark* ArgName(const std::string& name);
1174 
1175  // Set the argument names to display in the benchmark name. If not called,
1176  // only argument values will be shown.
1177  Benchmark* ArgNames(const std::vector<std::string>& names);
1178 
1179  // Equivalent to Ranges({{lo1, hi1}, {lo2, hi2}}).
1180  // NOTE: This is a legacy C++03 interface provided for compatibility only.
1181  // New code should use 'Ranges'.
1182  Benchmark* RangePair(int64_t lo1, int64_t hi1, int64_t lo2, int64_t hi2) {
1183  std::vector<std::pair<int64_t, int64_t>> ranges;
1184  ranges.push_back(std::make_pair(lo1, hi1));
1185  ranges.push_back(std::make_pair(lo2, hi2));
1186  return Ranges(ranges);
1187  }
1188 
1189  // Have "setup" and/or "teardown" invoked once for every benchmark run.
1190  // If the benchmark is multi-threaded (will run in k threads concurrently),
1191  // the setup callback will be be invoked exactly once (not k times) before
1192  // each run with k threads. Time allowing (e.g. for a short benchmark), there
1193  // may be multiple such runs per benchmark, each run with its own
1194  // "setup"/"teardown".
1195  //
1196  // If the benchmark uses different size groups of threads (e.g. via
1197  // ThreadRange), the above will be true for each size group.
1198  //
1199  // The callback will be passed a State object, which includes the number
1200  // of threads, thread-index, benchmark arguments, etc.
1201  Benchmark* Setup(callback_function&&);
1202  Benchmark* Setup(const callback_function&);
1203  Benchmark* Teardown(callback_function&&);
1204  Benchmark* Teardown(const callback_function&);
1205 
1206  // Pass this benchmark object to *func, which can customize
1207  // the benchmark by calling various methods like Arg, Args,
1208  // Threads, etc.
1209  Benchmark* Apply(void (*custom_arguments)(Benchmark* benchmark));
1210 
1211  // Set the range multiplier for non-dense range. If not called, the range
1212  // multiplier kRangeMultiplier will be used.
1213  Benchmark* RangeMultiplier(int multiplier);
1214 
1215  // Set the minimum amount of time to use when running this benchmark. This
1216  // option overrides the `benchmark_min_time` flag.
1217  // REQUIRES: `t > 0` and `Iterations` has not been called on this benchmark.
1218  Benchmark* MinTime(double t);
1219 
1220  // Set the minimum amount of time to run the benchmark before taking runtimes
1221  // of this benchmark into account. This
1222  // option overrides the `benchmark_min_warmup_time` flag.
1223  // REQUIRES: `t >= 0` and `Iterations` has not been called on this benchmark.
1224  Benchmark* MinWarmUpTime(double t);
1225 
1226  // Specify the amount of iterations that should be run by this benchmark.
1227  // This option overrides the `benchmark_min_time` flag.
1228  // REQUIRES: 'n > 0' and `MinTime` has not been called on this benchmark.
1229  //
1230  // NOTE: This function should only be used when *exact* iteration control is
1231  // needed and never to control or limit how long a benchmark runs, where
1232  // `--benchmark_min_time=<N>s` or `MinTime(...)` should be used instead.
1233  Benchmark* Iterations(IterationCount n);
1234 
1235  // Specify the amount of times to repeat this benchmark. This option overrides
1236  // the `benchmark_repetitions` flag.
1237  // REQUIRES: `n > 0`
1238  Benchmark* Repetitions(int n);
1239 
1240  // Specify if each repetition of the benchmark should be reported separately
1241  // or if only the final statistics should be reported. If the benchmark
1242  // is not repeated then the single result is always reported.
1243  // Applies to *ALL* reporters (display and file).
1244  Benchmark* ReportAggregatesOnly(bool value = true);
1245 
1246  // Same as ReportAggregatesOnly(), but applies to display reporter only.
1247  Benchmark* DisplayAggregatesOnly(bool value = true);
1248 
1249  // By default, the CPU time is measured only for the main thread, which may
1250  // be unrepresentative if the benchmark uses threads internally. If called,
1251  // the total CPU time spent by all the threads will be measured instead.
1252  // By default, only the main thread CPU time will be measured.
1253  Benchmark* MeasureProcessCPUTime();
1254 
1255  // If a particular benchmark should use the Wall clock instead of the CPU time
1256  // (be it either the CPU time of the main thread only (default), or the
1257  // total CPU usage of the benchmark), call this method. If called, the elapsed
1258  // (wall) time will be used to control how many iterations are run, and in the
1259  // printing of items/second or MB/seconds values.
1260  // If not called, the CPU time used by the benchmark will be used.
1261  Benchmark* UseRealTime();
1262 
1263  // If a benchmark must measure time manually (e.g. if GPU execution time is
1264  // being
1265  // measured), call this method. If called, each benchmark iteration should
1266  // call
1267  // SetIterationTime(seconds) to report the measured time, which will be used
1268  // to control how many iterations are run, and in the printing of items/second
1269  // or MB/second values.
1270  Benchmark* UseManualTime();
1271 
1272  // Set the asymptotic computational complexity for the benchmark. If called
1273  // the asymptotic computational complexity will be shown on the output.
1274  Benchmark* Complexity(BigO complexity = benchmark::oAuto);
1275 
1276  // Set the asymptotic computational complexity for the benchmark. If called
1277  // the asymptotic computational complexity will be shown on the output.
1278  Benchmark* Complexity(BigOFunc* complexity);
1279 
1280  // Add this statistics to be computed over all the values of benchmark run
1281  Benchmark* ComputeStatistics(const std::string& name,
1282  StatisticsFunc* statistics,
1283  StatisticUnit unit = kTime);
1284 
1285  // Support for running multiple copies of the same benchmark concurrently
1286  // in multiple threads. This may be useful when measuring the scaling
1287  // of some piece of code.
1288 
1289  // Run one instance of this benchmark concurrently in t threads.
1290  Benchmark* Threads(int t);
1291 
1292  // Pick a set of values T from [min_threads,max_threads].
1293  // min_threads and max_threads are always included in T. Run this
1294  // benchmark once for each value in T. The benchmark run for a
1295  // particular value t consists of t threads running the benchmark
1296  // function concurrently. For example, consider:
1297  // BENCHMARK(Foo)->ThreadRange(1,16);
1298  // This will run the following benchmarks:
1299  // Foo in 1 thread
1300  // Foo in 2 threads
1301  // Foo in 4 threads
1302  // Foo in 8 threads
1303  // Foo in 16 threads
1304  Benchmark* ThreadRange(int min_threads, int max_threads);
1305 
1306  // For each value n in the range, run this benchmark once using n threads.
1307  // min_threads and max_threads are always included in the range.
1308  // stride specifies the increment. E.g. DenseThreadRange(1, 8, 3) starts
1309  // a benchmark with 1, 4, 7 and 8 threads.
1310  Benchmark* DenseThreadRange(int min_threads, int max_threads, int stride = 1);
1311 
1312  // Equivalent to ThreadRange(NumCPUs(), NumCPUs())
1313  Benchmark* ThreadPerCpu();
1314 
1315  // Sets a user-defined threadrunner (see ThreadRunnerBase)
1316  Benchmark* ThreadRunner(threadrunner_factory&& factory);
1317 
1318  virtual void Run(State& state) = 0;
1319 
1320  TimeUnit GetTimeUnit() const;
1321 
1322  protected:
1323  explicit Benchmark(const std::string& name);
1324  void SetName(const std::string& name);
1325 
1326  public:
1327  const char* GetName() const;
1328  int ArgsCnt() const;
1329  const char* GetArgName(int arg) const;
1330 
1331  private:
1332  friend class BenchmarkFamilies;
1333  friend class BenchmarkInstance;
1334 
1335  std::string name_;
1336  AggregationReportMode aggregation_report_mode_;
1337  std::vector<std::string> arg_names_; // Args for all benchmark runs
1338  std::vector<std::vector<int64_t>> args_; // Args for all benchmark runs
1339 
1340  TimeUnit time_unit_;
1341  bool use_default_time_unit_;
1342 
1343  int range_multiplier_;
1344  double min_time_;
1345  double min_warmup_time_;
1346  IterationCount iterations_;
1347  int repetitions_;
1348  bool measure_process_cpu_time_;
1349  bool use_real_time_;
1350  bool use_manual_time_;
1351  BigO complexity_;
1352  BigOFunc* complexity_lambda_;
1353  std::vector<Statistics> statistics_;
1354  std::vector<int> thread_counts_;
1355 
1356  callback_function setup_;
1357  callback_function teardown_;
1358 
1359  threadrunner_factory threadrunner_;
1360 
1361  BENCHMARK_DISALLOW_COPY_AND_ASSIGN(Benchmark);
1362 };
1363 
1364 } // namespace internal
1365 
1366 // Create and register a benchmark with the specified 'name' that invokes
1367 // the specified functor 'fn'.
1368 //
1369 // RETURNS: A pointer to the registered benchmark.
1370 internal::Benchmark* RegisterBenchmark(const std::string& name,
1371  internal::Function* fn);
1372 
1373 template <class Lambda>
1374 internal::Benchmark* RegisterBenchmark(const std::string& name, Lambda&& fn);
1375 
1376 // Remove all registered benchmarks. All pointers to previously registered
1377 // benchmarks are invalidated.
1378 BENCHMARK_EXPORT void ClearRegisteredBenchmarks();
1379 
1380 namespace internal {
1381 // The class used to hold all Benchmarks created from static function.
1382 // (ie those created using the BENCHMARK(...) macros.
1383 class BENCHMARK_EXPORT FunctionBenchmark : public Benchmark {
1384  public:
1385  FunctionBenchmark(const std::string& name, Function* func)
1386  : Benchmark(name), func_(func) {}
1387 
1388  void Run(State& st) override;
1389 
1390  private:
1391  Function* func_;
1392 };
1393 
1394 template <class Lambda>
1395 class LambdaBenchmark : public Benchmark {
1396  public:
1397  void Run(State& st) override { lambda_(st); }
1398 
1399  template <class OLambda>
1400  LambdaBenchmark(const std::string& name, OLambda&& lam)
1401  : Benchmark(name), lambda_(std::forward<OLambda>(lam)) {}
1402 
1403  private:
1404  LambdaBenchmark(LambdaBenchmark const&) = delete;
1405  Lambda lambda_;
1406 };
1407 } // namespace internal
1408 
1409 inline internal::Benchmark* RegisterBenchmark(const std::string& name,
1410  internal::Function* fn) {
1411  return internal::RegisterBenchmarkInternal(
1412  ::benchmark::internal::make_unique<internal::FunctionBenchmark>(name,
1413  fn));
1414 }
1415 
1416 template <class Lambda>
1417 internal::Benchmark* RegisterBenchmark(const std::string& name, Lambda&& fn) {
1418  using BenchType =
1419  internal::LambdaBenchmark<typename std::decay<Lambda>::type>;
1420  return internal::RegisterBenchmarkInternal(
1421  ::benchmark::internal::make_unique<BenchType>(name,
1422  std::forward<Lambda>(fn)));
1423 }
1424 
1425 template <class Lambda, class... Args>
1426 internal::Benchmark* RegisterBenchmark(const std::string& name, Lambda&& fn,
1427  Args&&... args) {
1428  return benchmark::RegisterBenchmark(
1429  name, [=](benchmark::State& st) { fn(st, args...); });
1430 }
1431 
1432 // The base class for all fixture tests.
1434  public:
1435  Fixture() : internal::Benchmark("") {}
1436 
1437  void Run(State& st) override {
1438  this->SetUp(st);
1439  this->BenchmarkCase(st);
1440  this->TearDown(st);
1441  }
1442 
1443  // These will be deprecated ...
1444  virtual void SetUp(const State&) {}
1445  virtual void TearDown(const State&) {}
1446  // ... In favor of these.
1447  virtual void SetUp(State& st) { SetUp(const_cast<const State&>(st)); }
1448  virtual void TearDown(State& st) { TearDown(const_cast<const State&>(st)); }
1449 
1450  protected:
1451  virtual void BenchmarkCase(State&) = 0;
1452 };
1453 } // namespace benchmark
1454 
1455 // ------------------------------------------------------
1456 // Macro to register benchmarks
1457 
1458 // Check that __COUNTER__ is defined and that __COUNTER__ increases by 1
1459 // every time it is expanded. X + 1 == X + 0 is used in case X is defined to be
1460 // empty. If X is empty the expression becomes (+1 == +0).
1461 #if defined(__COUNTER__) && (__COUNTER__ + 1 == __COUNTER__ + 0)
1462 #define BENCHMARK_PRIVATE_UNIQUE_ID __COUNTER__
1463 #else
1464 #define BENCHMARK_PRIVATE_UNIQUE_ID __LINE__
1465 #endif
1466 
1467 // Helpers for generating unique variable names
1468 #define BENCHMARK_PRIVATE_NAME(...) \
1469  BENCHMARK_PRIVATE_CONCAT(benchmark_uniq_, BENCHMARK_PRIVATE_UNIQUE_ID, \
1470  __VA_ARGS__)
1471 
1472 #define BENCHMARK_PRIVATE_CONCAT(a, b, c) BENCHMARK_PRIVATE_CONCAT2(a, b, c)
1473 #define BENCHMARK_PRIVATE_CONCAT2(a, b, c) a##b##c
1474 // Helper for concatenation with macro name expansion
1475 #define BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method) \
1476  BaseClass##_##Method##_Benchmark
1477 
1478 #define BENCHMARK_PRIVATE_DECLARE(n) \
1479  /* NOLINTNEXTLINE(misc-use-anonymous-namespace) */ \
1480  static ::benchmark::internal::Benchmark const* const BENCHMARK_PRIVATE_NAME( \
1481  n) BENCHMARK_UNUSED
1482 
1483 #define BENCHMARK(...) \
1484  BENCHMARK_PRIVATE_DECLARE(_benchmark_) = \
1485  (::benchmark::internal::RegisterBenchmarkInternal( \
1486  ::benchmark::internal::make_unique< \
1487  ::benchmark::internal::FunctionBenchmark>(#__VA_ARGS__, \
1488  __VA_ARGS__)))
1489 
1490 // Old-style macros
1491 #define BENCHMARK_WITH_ARG(n, a) BENCHMARK(n)->Arg((a))
1492 #define BENCHMARK_WITH_ARG2(n, a1, a2) BENCHMARK(n)->Args({(a1), (a2)})
1493 #define BENCHMARK_WITH_UNIT(n, t) BENCHMARK(n)->Unit((t))
1494 #define BENCHMARK_RANGE(n, lo, hi) BENCHMARK(n)->Range((lo), (hi))
1495 #define BENCHMARK_RANGE2(n, l1, h1, l2, h2) \
1496  BENCHMARK(n)->RangePair({{(l1), (h1)}, {(l2), (h2)}})
1497 
1498 // Register a benchmark which invokes the function specified by `func`
1499 // with the additional arguments specified by `...`.
1500 //
1501 // For example:
1502 //
1503 // template <class ...ExtraArgs>`
1504 // void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) {
1505 // [...]
1506 //}
1507 // /* Registers a benchmark named "BM_takes_args/int_string_test` */
1508 // BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc"));
1509 #define BENCHMARK_CAPTURE(func, test_case_name, ...) \
1510  BENCHMARK_PRIVATE_DECLARE(_benchmark_) = \
1511  (::benchmark::internal::RegisterBenchmarkInternal( \
1512  ::benchmark::internal::make_unique< \
1513  ::benchmark::internal::FunctionBenchmark>( \
1514  #func "/" #test_case_name, \
1515  [](::benchmark::State& st) { func(st, __VA_ARGS__); })))
1516 
1517 // This will register a benchmark for a templatized function. For example:
1518 //
1519 // template<int arg>
1520 // void BM_Foo(int iters);
1521 //
1522 // BENCHMARK_TEMPLATE(BM_Foo, 1);
1523 //
1524 // will register BM_Foo<1> as a benchmark.
1525 #define BENCHMARK_TEMPLATE1(n, a) \
1526  BENCHMARK_PRIVATE_DECLARE(n) = \
1527  (::benchmark::internal::RegisterBenchmarkInternal( \
1528  ::benchmark::internal::make_unique< \
1529  ::benchmark::internal::FunctionBenchmark>(#n "<" #a ">", n<a>)))
1530 
1531 #define BENCHMARK_TEMPLATE2(n, a, b) \
1532  BENCHMARK_PRIVATE_DECLARE(n) = \
1533  (::benchmark::internal::RegisterBenchmarkInternal( \
1534  ::benchmark::internal::make_unique< \
1535  ::benchmark::internal::FunctionBenchmark>(#n "<" #a "," #b ">", \
1536  n<a, b>)))
1537 
1538 #define BENCHMARK_TEMPLATE(n, ...) \
1539  BENCHMARK_PRIVATE_DECLARE(n) = \
1540  (::benchmark::internal::RegisterBenchmarkInternal( \
1541  ::benchmark::internal::make_unique< \
1542  ::benchmark::internal::FunctionBenchmark>( \
1543  #n "<" #__VA_ARGS__ ">", n<__VA_ARGS__>)))
1544 
1545 // This will register a benchmark for a templatized function,
1546 // with the additional arguments specified by `...`.
1547 //
1548 // For example:
1549 //
1550 // template <typename T, class ...ExtraArgs>`
1551 // void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) {
1552 // [...]
1553 //}
1554 // /* Registers a benchmark named "BM_takes_args<void>/int_string_test` */
1555 // BENCHMARK_TEMPLATE1_CAPTURE(BM_takes_args, void, int_string_test, 42,
1556 // std::string("abc"));
1557 #define BENCHMARK_TEMPLATE1_CAPTURE(func, a, test_case_name, ...) \
1558  BENCHMARK_CAPTURE(func<a>, test_case_name, __VA_ARGS__)
1559 
1560 #define BENCHMARK_TEMPLATE2_CAPTURE(func, a, b, test_case_name, ...) \
1561  BENCHMARK_PRIVATE_DECLARE(func) = \
1562  (::benchmark::internal::RegisterBenchmarkInternal( \
1563  ::benchmark::internal::make_unique< \
1564  ::benchmark::internal::FunctionBenchmark>( \
1565  #func "<" #a "," #b ">" \
1566  "/" #test_case_name, \
1567  [](::benchmark::State& st) { func<a, b>(st, __VA_ARGS__); })))
1568 
1569 #define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
1570  class BaseClass##_##Method##_Benchmark : public BaseClass { \
1571  public: \
1572  BaseClass##_##Method##_Benchmark() { \
1573  this->SetName(#BaseClass "/" #Method); \
1574  } \
1575  \
1576  protected: \
1577  void BenchmarkCase(::benchmark::State&) override; \
1578  };
1579 
1580 #define BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
1581  class BaseClass##_##Method##_Benchmark : public BaseClass<a> { \
1582  public: \
1583  BaseClass##_##Method##_Benchmark() { \
1584  this->SetName(#BaseClass "<" #a ">/" #Method); \
1585  } \
1586  \
1587  protected: \
1588  void BenchmarkCase(::benchmark::State&) override; \
1589  };
1590 
1591 #define BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
1592  class BaseClass##_##Method##_Benchmark : public BaseClass<a, b> { \
1593  public: \
1594  BaseClass##_##Method##_Benchmark() { \
1595  this->SetName(#BaseClass "<" #a "," #b ">/" #Method); \
1596  } \
1597  \
1598  protected: \
1599  void BenchmarkCase(::benchmark::State&) override; \
1600  };
1601 
1602 #define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, ...) \
1603  class BaseClass##_##Method##_Benchmark : public BaseClass<__VA_ARGS__> { \
1604  public: \
1605  BaseClass##_##Method##_Benchmark() { \
1606  this->SetName(#BaseClass "<" #__VA_ARGS__ ">/" #Method); \
1607  } \
1608  \
1609  protected: \
1610  void BenchmarkCase(::benchmark::State&) override; \
1611  };
1612 
1613 #define BENCHMARK_DEFINE_F(BaseClass, Method) \
1614  BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
1615  void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1616 
1617 #define BENCHMARK_TEMPLATE1_DEFINE_F(BaseClass, Method, a) \
1618  BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
1619  void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1620 
1621 #define BENCHMARK_TEMPLATE2_DEFINE_F(BaseClass, Method, a, b) \
1622  BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
1623  void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1624 
1625 #define BENCHMARK_TEMPLATE_DEFINE_F(BaseClass, Method, ...) \
1626  BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, __VA_ARGS__) \
1627  void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1628 
1629 #define BENCHMARK_REGISTER_F(BaseClass, Method) \
1630  BENCHMARK_PRIVATE_REGISTER_F(BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method))
1631 
1632 #define BENCHMARK_PRIVATE_REGISTER_F(TestName) \
1633  BENCHMARK_PRIVATE_DECLARE(TestName) = \
1634  (::benchmark::internal::RegisterBenchmarkInternal( \
1635  ::benchmark::internal::make_unique<TestName>()))
1636 
1637 #define BENCHMARK_TEMPLATE_PRIVATE_CONCAT_NAME_F(BaseClass, Method) \
1638  BaseClass##_##Method##_BenchmarkTemplate
1639 
1640 #define BENCHMARK_TEMPLATE_METHOD_F(BaseClass, Method) \
1641  template <class... Args> \
1642  class BENCHMARK_TEMPLATE_PRIVATE_CONCAT_NAME_F(BaseClass, Method) \
1643  : public BaseClass<Args...> { \
1644  protected: \
1645  using Base = BaseClass<Args...>; \
1646  void BenchmarkCase(::benchmark::State&) override; \
1647  }; \
1648  template <class... Args> \
1649  void BENCHMARK_TEMPLATE_PRIVATE_CONCAT_NAME_F( \
1650  BaseClass, Method)<Args...>::BenchmarkCase
1651 
1652 #define BENCHMARK_TEMPLATE_PRIVATE_INSTANTIATE_F(BaseClass, Method, \
1653  UniqueName, ...) \
1654  class UniqueName : public BENCHMARK_TEMPLATE_PRIVATE_CONCAT_NAME_F( \
1655  BaseClass, Method)<__VA_ARGS__> { \
1656  public: \
1657  UniqueName() { this->SetName(#BaseClass "<" #__VA_ARGS__ ">/" #Method); } \
1658  }; \
1659  BENCHMARK_PRIVATE_DECLARE(BaseClass##_##Method##_Benchmark) = \
1660  (::benchmark::internal::RegisterBenchmarkInternal( \
1661  ::benchmark::internal::make_unique<UniqueName>()))
1662 
1663 #define BENCHMARK_TEMPLATE_INSTANTIATE_F(BaseClass, Method, ...) \
1664  BENCHMARK_TEMPLATE_PRIVATE_INSTANTIATE_F( \
1665  BaseClass, Method, BENCHMARK_PRIVATE_NAME(BaseClass##Method), \
1666  __VA_ARGS__)
1667 
1668 // This macro will define and register a benchmark within a fixture class.
1669 #define BENCHMARK_F(BaseClass, Method) \
1670  BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
1671  BENCHMARK_REGISTER_F(BaseClass, Method); \
1672  void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1673 
1674 #define BENCHMARK_TEMPLATE1_F(BaseClass, Method, a) \
1675  BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
1676  BENCHMARK_REGISTER_F(BaseClass, Method); \
1677  void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1678 
1679 #define BENCHMARK_TEMPLATE2_F(BaseClass, Method, a, b) \
1680  BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
1681  BENCHMARK_REGISTER_F(BaseClass, Method); \
1682  void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1683 
1684 #define BENCHMARK_TEMPLATE_F(BaseClass, Method, ...) \
1685  BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, __VA_ARGS__) \
1686  BENCHMARK_REGISTER_F(BaseClass, Method); \
1687  void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1688 
1689 // Helper macro to create a main routine in a test that runs the benchmarks
1690 // Note the workaround for Hexagon simulator passing argc != 0, argv = NULL.
1691 #define BENCHMARK_MAIN() \
1692  int main(int argc, char** argv) { \
1693  benchmark::MaybeReenterWithoutASLR(argc, argv); \
1694  char arg0_default[] = "benchmark"; \
1695  char* args_default = reinterpret_cast<char*>(arg0_default); \
1696  if (!argv) { \
1697  argc = 1; \
1698  argv = &args_default; \
1699  } \
1700  ::benchmark::Initialize(&argc, argv); \
1701  if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return 1; \
1702  ::benchmark::RunSpecifiedBenchmarks(); \
1703  ::benchmark::Shutdown(); \
1704  return 0; \
1705  } \
1706  int main(int, char**)
1707 
1708 // ------------------------------------------------------
1709 // Benchmark Reporters
1710 
1711 namespace benchmark {
1712 
1713 struct BENCHMARK_EXPORT CPUInfo {
1714  struct CacheInfo {
1715  std::string type;
1716  int level;
1717  int size;
1718  int num_sharing;
1719  };
1720 
1721  enum Scaling { UNKNOWN, ENABLED, DISABLED };
1722 
1723  int num_cpus;
1724  Scaling scaling;
1725  double cycles_per_second;
1726  std::vector<CacheInfo> caches;
1727  std::vector<double> load_avg;
1728 
1729  static const CPUInfo& Get();
1730 
1731  private:
1732  CPUInfo();
1733  BENCHMARK_DISALLOW_COPY_AND_ASSIGN(CPUInfo);
1734 };
1735 
1736 // Adding Struct for System Information
1737 struct BENCHMARK_EXPORT SystemInfo {
1738  enum class ASLR { UNKNOWN, ENABLED, DISABLED };
1739 
1740  std::string name;
1741  ASLR ASLRStatus;
1742  static const SystemInfo& Get();
1743 
1744  private:
1745  SystemInfo();
1746  BENCHMARK_DISALLOW_COPY_AND_ASSIGN(SystemInfo);
1747 };
1748 
1749 // BenchmarkName contains the components of the Benchmark's name
1750 // which allows individual fields to be modified or cleared before
1751 // building the final name using 'str()'.
1752 struct BENCHMARK_EXPORT BenchmarkName {
1753  std::string function_name;
1754  std::string args;
1755  std::string min_time;
1756  std::string min_warmup_time;
1757  std::string iterations;
1758  std::string repetitions;
1759  std::string time_type;
1760  std::string threads;
1761 
1762  // Return the full name of the benchmark with each non-empty
1763  // field separated by a '/'
1764  std::string str() const;
1765 };
1766 
1767 // Interface for custom benchmark result printers.
1768 // By default, benchmark reports are printed to stdout. However an application
1769 // can control the destination of the reports by calling
1770 // RunSpecifiedBenchmarks and passing it a custom reporter object.
1771 // The reporter object must implement the following interface.
1772 class BENCHMARK_EXPORT BenchmarkReporter {
1773  public:
1774  struct Context {
1775  CPUInfo const& cpu_info;
1776  SystemInfo const& sys_info;
1777  // The number of chars in the longest benchmark name.
1778  size_t name_field_width = 0;
1779  static const char* executable_name;
1780  Context();
1781  };
1782 
1783  struct BENCHMARK_EXPORT Run {
1784  static const int64_t no_repetition_index = -1;
1785  enum RunType { RT_Iteration, RT_Aggregate };
1786 
1787  Run()
1788  : run_type(RT_Iteration),
1789  aggregate_unit(kTime),
1790  skipped(internal::NotSkipped),
1791  iterations(1),
1792  threads(1),
1793  time_unit(GetDefaultTimeUnit()),
1794  real_accumulated_time(0),
1795  cpu_accumulated_time(0),
1796  max_heapbytes_used(0),
1797  use_real_time_for_initial_big_o(false),
1798  complexity(oNone),
1799  complexity_lambda(),
1800  complexity_n(0),
1801  report_big_o(false),
1802  report_rms(false),
1803  allocs_per_iter(0.0) {}
1804 
1805  std::string benchmark_name() const;
1806  BenchmarkName run_name;
1807  int64_t family_index;
1808  int64_t per_family_instance_index;
1809  RunType run_type;
1810  std::string aggregate_name;
1811  StatisticUnit aggregate_unit;
1812  std::string report_label; // Empty if not set by benchmark.
1813  internal::Skipped skipped;
1814  std::string skip_message;
1815 
1816  IterationCount iterations;
1817  int64_t threads;
1818  int64_t repetition_index;
1819  int64_t repetitions;
1820  TimeUnit time_unit;
1821  double real_accumulated_time;
1822  double cpu_accumulated_time;
1823 
1824  // Return a value representing the real time per iteration in the unit
1825  // specified by 'time_unit'.
1826  // NOTE: If 'iterations' is zero the returned value represents the
1827  // accumulated time.
1828  double GetAdjustedRealTime() const;
1829 
1830  // Return a value representing the cpu time per iteration in the unit
1831  // specified by 'time_unit'.
1832  // NOTE: If 'iterations' is zero the returned value represents the
1833  // accumulated time.
1834  double GetAdjustedCPUTime() const;
1835 
1836  // This is set to 0.0 if memory tracing is not enabled.
1837  double max_heapbytes_used;
1838 
1839  // By default Big-O is computed for CPU time, but that is not what you want
1840  // to happen when manual time was requested, which is stored as real time.
1841  bool use_real_time_for_initial_big_o;
1842 
1843  // Keep track of arguments to compute asymptotic complexity
1844  BigO complexity;
1845  BigOFunc* complexity_lambda;
1846  ComplexityN complexity_n;
1847 
1848  // what statistics to compute from the measurements
1849  const std::vector<internal::Statistics>* statistics;
1850 
1851  // Inform print function whether the current run is a complexity report
1852  bool report_big_o;
1853  bool report_rms;
1854 
1855  UserCounters counters;
1856 
1857  // Memory metrics.
1858  MemoryManager::Result memory_result;
1859  double allocs_per_iter;
1860  };
1861 
1863  PerFamilyRunReports() : num_runs_total(0), num_runs_done(0) {}
1864 
1865  // How many runs will all instances of this benchmark perform?
1866  int num_runs_total;
1867 
1868  // How many runs have happened already?
1869  int num_runs_done;
1870 
1871  // The reports about (non-errneous!) runs of this family.
1872  std::vector<BenchmarkReporter::Run> Runs;
1873  };
1874 
1875  // Construct a BenchmarkReporter with the output stream set to 'std::cout'
1876  // and the error stream set to 'std::cerr'
1878 
1879  // Called once for every suite of benchmarks run.
1880  // The parameter "context" contains information that the
1881  // reporter may wish to use when generating its report, for example the
1882  // platform under which the benchmarks are running. The benchmark run is
1883  // never started if this function returns false, allowing the reporter
1884  // to skip runs based on the context information.
1885  virtual bool ReportContext(const Context& context) = 0;
1886 
1887  // Called once for each group of benchmark runs, gives information about
1888  // the configurations of the runs.
1889  virtual void ReportRunsConfig(double /*min_time*/,
1890  bool /*has_explicit_iters*/,
1891  IterationCount /*iters*/) {}
1892 
1893  // Called once for each group of benchmark runs, gives information about
1894  // cpu-time and heap memory usage during the benchmark run. If the group
1895  // of runs contained more than two entries then 'report' contains additional
1896  // elements representing the mean and standard deviation of those runs.
1897  // Additionally if this group of runs was the last in a family of benchmarks
1898  // 'reports' contains additional entries representing the asymptotic
1899  // complexity and RMS of that benchmark family.
1900  virtual void ReportRuns(const std::vector<Run>& report) = 0;
1901 
1902  // Called once and only once after ever group of benchmarks is run and
1903  // reported.
1904  virtual void Finalize() {}
1905 
1906  // REQUIRES: The object referenced by 'out' is valid for the lifetime
1907  // of the reporter.
1908  void SetOutputStream(std::ostream* out) {
1909  assert(out);
1910  output_stream_ = out;
1911  }
1912 
1913  // REQUIRES: The object referenced by 'err' is valid for the lifetime
1914  // of the reporter.
1915  void SetErrorStream(std::ostream* err) {
1916  assert(err);
1917  error_stream_ = err;
1918  }
1919 
1920  std::ostream& GetOutputStream() const { return *output_stream_; }
1921 
1922  std::ostream& GetErrorStream() const { return *error_stream_; }
1923 
1924  virtual ~BenchmarkReporter();
1925 
1926  // Write a human readable string to 'out' representing the specified
1927  // 'context'.
1928  // REQUIRES: 'out' is non-null.
1929  static void PrintBasicContext(std::ostream* out, Context const& context);
1930 
1931  private:
1932  std::ostream* output_stream_;
1933  std::ostream* error_stream_;
1934 };
1935 
1936 // Simple reporter that outputs benchmark data to the console. This is the
1937 // default reporter used by RunSpecifiedBenchmarks().
1938 class BENCHMARK_EXPORT ConsoleReporter : public BenchmarkReporter {
1939  public:
1940  enum OutputOptions {
1941  OO_None = 0,
1942  OO_Color = 1,
1943  OO_Tabular = 2,
1944  OO_ColorTabular = OO_Color | OO_Tabular,
1945  OO_Defaults = OO_ColorTabular
1946  };
1947  explicit ConsoleReporter(OutputOptions opts_ = OO_Defaults)
1948  : output_options_(opts_), name_field_width_(0), printed_header_(false) {}
1949 
1950  bool ReportContext(const Context& context) override;
1951  void ReportRuns(const std::vector<Run>& reports) override;
1952 
1953  protected:
1954  virtual void PrintRunData(const Run& result);
1955  virtual void PrintHeader(const Run& run);
1956 
1957  OutputOptions output_options_;
1958  size_t name_field_width_;
1959  UserCounters prev_counters_;
1960  bool printed_header_;
1961 };
1962 
1963 class BENCHMARK_EXPORT JSONReporter : public BenchmarkReporter {
1964  public:
1965  JSONReporter() : first_report_(true) {}
1966  bool ReportContext(const Context& context) override;
1967  void ReportRuns(const std::vector<Run>& reports) override;
1968  void Finalize() override;
1969 
1970  private:
1971  void PrintRunData(const Run& run);
1972 
1973  bool first_report_;
1974 };
1975 
1976 class BENCHMARK_EXPORT BENCHMARK_DEPRECATED_MSG(
1977  "The CSV Reporter will be removed in a future release") CSVReporter
1978  : public BenchmarkReporter {
1979  public:
1980  CSVReporter() : printed_header_(false) {}
1981  bool ReportContext(const Context& context) override;
1982  void ReportRuns(const std::vector<Run>& reports) override;
1983 
1984  private:
1985  void PrintRunData(const Run& run);
1986 
1987  bool printed_header_;
1988  std::set<std::string> user_counter_names_;
1989 };
1990 
1991 inline const char* GetTimeUnitString(TimeUnit unit) {
1992  switch (unit) {
1993  case kSecond:
1994  return "s";
1995  case kMillisecond:
1996  return "ms";
1997  case kMicrosecond:
1998  return "us";
1999  case kNanosecond:
2000  return "ns";
2001  }
2002  BENCHMARK_UNREACHABLE();
2003 }
2004 
2005 inline double GetTimeUnitMultiplier(TimeUnit unit) {
2006  switch (unit) {
2007  case kSecond:
2008  return 1;
2009  case kMillisecond:
2010  return 1e3;
2011  case kMicrosecond:
2012  return 1e6;
2013  case kNanosecond:
2014  return 1e9;
2015  }
2016  BENCHMARK_UNREACHABLE();
2017 }
2018 
2019 // Creates a list of integer values for the given range and multiplier.
2020 // This can be used together with ArgsProduct() to allow multiple ranges
2021 // with different multipliers.
2022 // Example:
2023 // ArgsProduct({
2024 // CreateRange(0, 1024, /*multi=*/32),
2025 // CreateRange(0, 100, /*multi=*/4),
2026 // CreateDenseRange(0, 4, /*step=*/1),
2027 // });
2028 BENCHMARK_EXPORT
2029 std::vector<int64_t> CreateRange(int64_t lo, int64_t hi, int multi);
2030 
2031 // Creates a list of integer values for the given range and step.
2032 BENCHMARK_EXPORT
2033 std::vector<int64_t> CreateDenseRange(int64_t start, int64_t limit, int step);
2034 
2035 } // namespace benchmark
2036 
2037 #if defined(_MSC_VER)
2038 #pragma warning(pop)
2039 #endif
2040 
2041 #endif // BENCHMARK_BENCHMARK_H_
Definition: benchmark.h:1772
Definition: benchmark.h:1938
Definition: benchmark.h:623
Definition: benchmark.h:1433
Definition: benchmark.h:1963
Definition: benchmark.h:413
Definition: benchmark.h:460
Definition: benchmark.h:746
Definition: benchmark_register.cc:73
Definition: benchmark_api_internal.h:18
Definition: benchmark.h:1119
Definition: benchmark.h:1383
Definition: benchmark.h:1395
Definition: perf_counters.h:149
Definition: thread_manager.h:12
Definition: thread_timer.h:10
Definition: benchmark.h:1752
Definition: benchmark.h:1774
Definition: benchmark.h:1783
Definition: benchmark.h:1714
Definition: benchmark.h:1713
Definition: benchmark.h:417
Definition: benchmark.h:1052
Definition: benchmark.h:1051
Definition: benchmark.h:1737
Definition: benchmark.h:1100
Definition: benchmark.h:701