raw_hashtable_benchmark_helpers.h 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #ifndef CARBON_COMMON_RAW_HASHTABLE_BENCHMARK_HELPERS_H_
  5. #define CARBON_COMMON_RAW_HASHTABLE_BENCHMARK_HELPERS_H_
  6. #include <benchmark/benchmark.h>
  7. #include <sys/types.h>
  8. #include <limits>
  9. #include <map>
  10. #include <vector>
  11. #include "absl/base/no_destructor.h"
  12. #include "absl/random/random.h"
  13. #include "common/check.h"
  14. #include "common/hashing.h"
  15. #include "common/raw_hashtable.h"
  16. #include "llvm/ADT/ArrayRef.h"
  17. #include "llvm/ADT/Sequence.h"
  18. #include "llvm/ADT/SmallVector.h"
  19. #include "llvm/ADT/StringRef.h"
  20. namespace Carbon::RawHashtable {
  21. // We want to support benchmarking with 16M keys plus up to 256 "other" keys
  22. // (for misses). The large number of keys helps check for performance hiccups
  23. // with especially large tables and when missing all levels of cache.
  24. inline constexpr ssize_t NumOtherKeys = 1 << 8;
  25. inline constexpr ssize_t MaxNumKeys = (1 << 24) + NumOtherKeys;
  26. // Get an array of main keys with the given `size`, which must be less than
  27. // 2^24. Also get a miss keys array of `NumOtherKeys` which has no collisions
  28. // with the main keys.
  29. //
  30. // For a given size, this will return the same arrays. This uses unsynchronized
  31. // global state, and so is thread hostile and must not be called before main.
  32. template <typename T>
  33. auto GetKeysAndMissKeys(ssize_t table_keys_size)
  34. -> std::pair<llvm::ArrayRef<T>, llvm::ArrayRef<T>>;
  35. // Get an array of main keys with the given `size`, which must be less than
  36. // 2^24. Also get a hit keys array of `lookup_keys_size` all of which will occur
  37. // in the may keys array. If the lookup size is larger than the main size, the
  38. // lookup sequence will contain duplicates.
  39. //
  40. // For a given size, this will return the same arrays. This uses unsynchronized
  41. // global state, and so is thread hostile and must not be called before main.
  42. template <typename T>
  43. auto GetKeysAndHitKeys(ssize_t table_keys_size, ssize_t lookup_keys_size)
  44. -> std::pair<llvm::ArrayRef<T>, llvm::ArrayRef<T>>;
  45. // Dump statistics about hashing the given keys.
  46. template <typename T>
  47. auto DumpHashStatistics(llvm::ArrayRef<T> keys) -> void;
  48. // Convert values used in hashtable benchmarking to a bool. This is used to form
  49. // dependencies between values stored in the hashtable between benchmark
  50. // iterations.
  51. template <typename T>
  52. auto ValueToBool(T value) -> bool {
  53. if constexpr (std::is_same_v<T, llvm::StringRef>) {
  54. return value.size() > 0;
  55. } else if constexpr (std::is_pointer_v<T>) {
  56. return value != nullptr;
  57. } else {
  58. // We want our keys to include `0` for integers, so use the largest value.
  59. return value != std::numeric_limits<T>::max();
  60. }
  61. }
  62. inline auto SizeArgs(benchmark::internal::Benchmark* b) -> void {
  63. // Benchmarks for "miss" operations only have one parameter -- the size of the
  64. // table. These benchmarks use a fixed `NumOtherKeys` set of extra keys for
  65. // each miss operation.
  66. b->DenseRange(1, 4, 1);
  67. b->Arg(8);
  68. b->Arg(16);
  69. b->Arg(32);
  70. // For sizes >= 64 we first use the power of two which will have a low load
  71. // factor, and then target exactly at our max load factor.
  72. auto large_sizes = {64, 1 << 8, 1 << 12, 1 << 16, 1 << 20, 1 << 24};
  73. for (auto s : large_sizes) {
  74. b->Arg(s);
  75. }
  76. for (auto s : large_sizes) {
  77. b->Arg(s - (s / 8));
  78. }
  79. }
  80. inline auto HitArgs(benchmark::internal::Benchmark* b) -> void {
  81. // There are two parameters for benchmarks of "hit" operations. The first is
  82. // the size of the hashtable itself. The second is the size of a buffer of
  83. // random keys actually in the hashtable to use for the operations.
  84. //
  85. // For small sizes, we use a fixed `NumOtherKeys` lookup key count. This is
  86. // enough to avoid patterns of queries training the branch predictor just from
  87. // the keys themselves, while small enough to avoid significant L1 cache
  88. // pressure.
  89. b->ArgsProduct({benchmark::CreateDenseRange(1, 4, 1), {NumOtherKeys}});
  90. b->Args({8, NumOtherKeys});
  91. b->Args({16, NumOtherKeys});
  92. b->Args({32, NumOtherKeys});
  93. // For sizes >= 64 we first use the power of two which will have a low load
  94. // factor, and then target exactly at our max load factor. Start the sizes
  95. // list off with the powers of two, and the append a version of each power of
  96. // two adjusted down to the load factor. We'll then build the benchmarks from
  97. // these below.
  98. std::vector<ssize_t> large_sizes = {64, 1 << 8, 1 << 12,
  99. 1 << 16, 1 << 20, 1 << 24};
  100. for (auto i : llvm::seq<int>(0, large_sizes.size())) {
  101. ssize_t s = large_sizes[i];
  102. large_sizes.push_back(s - (s / 8));
  103. }
  104. for (auto s : large_sizes) {
  105. b->Args({s, NumOtherKeys});
  106. // Once the sizes are more than 4x the `NumOtherKeys` minimum lookup buffer
  107. // size, also include 25% and 50% lookup buffer sizes which will
  108. // increasingly exhaust the ability to keep matching entries in the cache.
  109. if (s >= NumOtherKeys) {
  110. b->Args({s, s / 4});
  111. b->Args({s, s / 2});
  112. }
  113. }
  114. }
  115. // Provide some Dense{Map,Set}Info viable implementations for the key types
  116. // using Carbon's hashing framework. These let us benchmark the data structure
  117. // alone rather than the combination of data structure and hashing routine.
  118. //
  119. // We only provide these for benchmarking -- they are *not* necessarily suitable
  120. // for broader use. The Carbon hashing infrastructure has only been evaluated in
  121. // the context of its specific hashtable design.
  122. template <typename T>
  123. struct CarbonHashDI;
  124. template <>
  125. struct CarbonHashDI<int> {
  126. static auto getEmptyKey() -> int { return -1; }
  127. static auto getTombstoneKey() -> int { return -2; }
  128. static auto getHashValue(const int val) -> unsigned {
  129. return static_cast<uint64_t>(HashValue(val));
  130. }
  131. static auto isEqual(const int lhs, const int rhs) -> bool {
  132. return lhs == rhs;
  133. }
  134. };
  135. template <typename T>
  136. struct CarbonHashDI<T*> {
  137. static constexpr uintptr_t Log2MaxAlign = 12;
  138. static auto getEmptyKey() -> T* {
  139. auto val = static_cast<uintptr_t>(-1);
  140. val <<= Log2MaxAlign;
  141. // NOLINTNEXTLINE(performance-no-int-to-ptr): This is required by the API.
  142. return reinterpret_cast<int*>(val);
  143. }
  144. static auto getTombstoneKey() -> T* {
  145. auto val = static_cast<uintptr_t>(-2);
  146. val <<= Log2MaxAlign;
  147. // NOLINTNEXTLINE(performance-no-int-to-ptr): This is required by the API.
  148. return reinterpret_cast<int*>(val);
  149. }
  150. static auto getHashValue(const T* ptr_val) -> unsigned {
  151. return static_cast<uint64_t>(HashValue(ptr_val));
  152. }
  153. static auto isEqual(const T* lhs, const T* rhs) -> bool { return lhs == rhs; }
  154. };
  155. template <>
  156. struct CarbonHashDI<llvm::StringRef> {
  157. static auto getEmptyKey() -> llvm::StringRef {
  158. return llvm::StringRef(
  159. // NOLINTNEXTLINE(performance-no-int-to-ptr): Required by the API.
  160. reinterpret_cast<const char*>(~static_cast<uintptr_t>(0)), 0);
  161. }
  162. static auto getTombstoneKey() -> llvm::StringRef {
  163. return llvm::StringRef(
  164. // NOLINTNEXTLINE(performance-no-int-to-ptr): Required by the API.
  165. reinterpret_cast<const char*>(~static_cast<uintptr_t>(1)), 0);
  166. }
  167. static auto getHashValue(llvm::StringRef val) -> unsigned {
  168. return static_cast<uint64_t>(HashValue(val));
  169. }
  170. static auto isEqual(llvm::StringRef lhs, llvm::StringRef rhs) -> bool {
  171. if (rhs.data() == getEmptyKey().data()) {
  172. return lhs.data() == getEmptyKey().data();
  173. }
  174. if (rhs.data() == getTombstoneKey().data()) {
  175. return lhs.data() == getTombstoneKey().data();
  176. }
  177. return lhs == rhs;
  178. }
  179. };
  180. template <typename TableT>
  181. auto ReportTableMetrics(const TableT& table, benchmark::State& state) -> void {
  182. // While this count is "iteration invariant" (it should be exactly the same
  183. // for every iteration as the set of keys is the same), we don't use that
  184. // because it will scale this by the number of iterations. We want to
  185. // display the metrics for this benchmark *parameter*, not what resulted
  186. // from the number of iterations. That means we use the normal counter API
  187. // without flags.
  188. auto metrics = table.ComputeMetrics();
  189. state.counters["P-compares"] = metrics.probe_avg_compares;
  190. state.counters["P-distance"] = metrics.probe_avg_distance;
  191. state.counters["P-fraction"] =
  192. static_cast<double>(metrics.probed_key_count) / metrics.key_count;
  193. state.counters["Pmax-distance"] = metrics.probe_max_distance;
  194. state.counters["Pmax-compares"] = metrics.probe_max_compares;
  195. state.counters["Probed"] = metrics.probed_key_count;
  196. state.counters["Storage"] = metrics.storage_bytes;
  197. // Also compute how 'efficient' the storage is, 1.0 being zero bytes outside
  198. // of key and value.
  199. ssize_t element_size;
  200. if constexpr (requires { TableT::ValueT; }) {
  201. element_size =
  202. sizeof(typename TableT::KeyT) + sizeof(typename TableT::ValueT);
  203. } else {
  204. element_size = sizeof(typename TableT::KeyT);
  205. }
  206. state.counters["Storage eff"] =
  207. static_cast<double>(metrics.key_count * element_size) /
  208. metrics.storage_bytes;
  209. }
  210. } // namespace Carbon::RawHashtable
  211. #endif // CARBON_COMMON_RAW_HASHTABLE_BENCHMARK_HELPERS_H_