raw_hashtable_benchmark_helpers.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #ifndef CARBON_COMMON_RAW_HASHTABLE_BENCHMARK_HELPERS_H_
  5. #define CARBON_COMMON_RAW_HASHTABLE_BENCHMARK_HELPERS_H_
  6. #include <benchmark/benchmark.h>
  7. #include <sys/types.h>
  8. #include <boost/unordered/unordered_flat_map.hpp>
  9. #include <compare>
  10. #include <limits>
  11. #include <map>
  12. #include <vector>
  13. #include "absl/base/no_destructor.h"
  14. #include "absl/random/random.h"
  15. #include "common/check.h"
  16. #include "common/hashing.h"
  17. #include "common/raw_hashtable.h"
  18. #include "llvm/ADT/ArrayRef.h"
  19. #include "llvm/ADT/Sequence.h"
  20. #include "llvm/ADT/SmallVector.h"
  21. #include "llvm/ADT/StringRef.h"
  22. namespace Carbon::RawHashtable {
  23. // We want to support benchmarking with 16M keys plus up to 256 "other" keys
  24. // (for misses). The large number of keys helps check for performance hiccups
  25. // with especially large tables and when missing all levels of cache.
  26. inline constexpr ssize_t NumOtherKeys = 1 << 8;
  27. inline constexpr ssize_t MaxNumKeys = (1 << 24) + NumOtherKeys;
  28. // Get an array of main keys with the given `size`, which must be less than
  29. // 2^24. Also get a miss keys array of `NumOtherKeys` which has no collisions
  30. // with the main keys.
  31. //
  32. // For a given size, this will return the same arrays. This uses unsynchronized
  33. // global state, and so is thread hostile and must not be called before main.
  34. template <typename T>
  35. auto GetKeysAndMissKeys(ssize_t table_keys_size)
  36. -> std::pair<llvm::ArrayRef<T>, llvm::ArrayRef<T>>;
  37. // Get an array of main keys with the given `size`, which must be less than
  38. // 2^24. Also get a hit keys array of `lookup_keys_size` all of which will occur
  39. // in the may keys array. If the lookup size is larger than the main size, the
  40. // lookup sequence will contain duplicates.
  41. //
  42. // For a given size, this will return the same arrays. This uses unsynchronized
  43. // global state, and so is thread hostile and must not be called before main.
  44. template <typename T>
  45. auto GetKeysAndHitKeys(ssize_t table_keys_size, ssize_t lookup_keys_size)
  46. -> std::pair<llvm::ArrayRef<T>, llvm::ArrayRef<T>>;
  47. // Dump statistics about hashing the given keys.
  48. template <typename T>
  49. auto DumpHashStatistics(llvm::ArrayRef<T> keys) -> void;
  50. // A type that works like an `int` but shifting in the specified number of low
  51. // zero bits. This is only intended for testing hash tables with especially
  52. // difficult to hash integer values, it isn't meant to be used otherwise.
  53. template <int LowZeroBits>
  54. struct LowZeroBitInt {
  55. int64_t shifted_value = 0;
  56. explicit constexpr LowZeroBitInt() = default;
  57. explicit constexpr LowZeroBitInt(int64_t value)
  58. : shifted_value(value << LowZeroBits) {}
  59. friend auto operator<<(llvm::raw_ostream& out, const LowZeroBitInt& value)
  60. -> llvm::raw_ostream& {
  61. return out << value.shifted_value;
  62. }
  63. constexpr auto operator==(const LowZeroBitInt& rhs) const -> bool = default;
  64. constexpr auto operator<=>(const LowZeroBitInt& rhs) const
  65. -> std::strong_ordering = default;
  66. friend auto CarbonHashValue(const LowZeroBitInt& value, uint64_t seed)
  67. -> HashCode {
  68. return HashValue(value.shifted_value, seed);
  69. }
  70. template <typename H>
  71. friend auto AbslHashValue(H h, const LowZeroBitInt& value) -> H {
  72. return H::combine(std::move(h), value.shifted_value);
  73. }
  74. friend auto hash_value(const LowZeroBitInt& value) -> size_t {
  75. boost::hash<int64_t> hasher;
  76. return hasher(value.shifted_value);
  77. }
  78. };
  79. // Convert values used in hashtable benchmarking to a bool. This is used to form
  80. // dependencies between values stored in the hashtable between benchmark
  81. // iterations.
  82. template <typename T>
  83. auto ValueToBool(T value) -> bool {
  84. if constexpr (std::is_same_v<T, llvm::StringRef>) {
  85. return value.size() > 0;
  86. } else if constexpr (std::is_pointer_v<T>) {
  87. return value != nullptr;
  88. } else {
  89. // We want our keys to include `0` for integers, so use the largest value.
  90. return value != std::numeric_limits<T>::max();
  91. }
  92. }
  93. inline auto SizeArgs(benchmark::Benchmark* b) -> void {
  94. // Benchmarks for "miss" operations only have one parameter -- the size of the
  95. // table. These benchmarks use a fixed `NumOtherKeys` set of extra keys for
  96. // each miss operation.
  97. b->DenseRange(1, 4, 1);
  98. b->Arg(8);
  99. b->Arg(16);
  100. b->Arg(32);
  101. // For sizes >= 64 we first use the power of two which will have a low load
  102. // factor, and then target exactly at our max load factor.
  103. auto large_sizes = {64, 1 << 8, 1 << 12, 1 << 16, 1 << 20, 1 << 24};
  104. for (auto s : large_sizes) {
  105. b->Arg(s);
  106. }
  107. for (auto s : large_sizes) {
  108. b->Arg(s - (s / 8));
  109. }
  110. }
  111. inline auto HitArgs(benchmark::Benchmark* b) -> void {
  112. // There are two parameters for benchmarks of "hit" operations. The first is
  113. // the size of the hashtable itself. The second is the size of a buffer of
  114. // random keys actually in the hashtable to use for the operations.
  115. //
  116. // For small sizes, we use a fixed `NumOtherKeys` lookup key count. This is
  117. // enough to avoid patterns of queries training the branch predictor just from
  118. // the keys themselves, while small enough to avoid significant L1 cache
  119. // pressure.
  120. b->ArgsProduct({benchmark::CreateDenseRange(1, 4, 1), {NumOtherKeys}});
  121. b->Args({8, NumOtherKeys});
  122. b->Args({16, NumOtherKeys});
  123. b->Args({32, NumOtherKeys});
  124. // For sizes >= 64 we first use the power of two which will have a low load
  125. // factor, and then target exactly at our max load factor. Start the sizes
  126. // list off with the powers of two, and the append a version of each power of
  127. // two adjusted down to the load factor. We'll then build the benchmarks from
  128. // these below.
  129. std::vector<ssize_t> large_sizes = {64, 1 << 8, 1 << 12,
  130. 1 << 16, 1 << 20, 1 << 24};
  131. for (auto i : llvm::seq<int>(0, large_sizes.size())) {
  132. ssize_t s = large_sizes[i];
  133. large_sizes.push_back(s - (s / 8));
  134. }
  135. for (auto s : large_sizes) {
  136. b->Args({s, NumOtherKeys});
  137. // Once the sizes are more than 4x the `NumOtherKeys` minimum lookup buffer
  138. // size, also include 25% and 50% lookup buffer sizes which will
  139. // increasingly exhaust the ability to keep matching entries in the cache.
  140. if (s >= NumOtherKeys) {
  141. b->Args({s, s / 4});
  142. b->Args({s, s / 2});
  143. }
  144. }
  145. }
  146. // Provide some Dense{Map,Set}Info viable implementations for the key types
  147. // using Carbon's hashing framework. These let us benchmark the data structure
  148. // alone rather than the combination of data structure and hashing routine.
  149. //
  150. // We only provide these for benchmarking -- they are *not* necessarily suitable
  151. // for broader use. The Carbon hashing infrastructure has only been evaluated in
  152. // the context of its specific hashtable design.
  153. template <typename T>
  154. struct CarbonHashDI;
  155. template <>
  156. struct CarbonHashDI<int> {
  157. static auto getEmptyKey() -> int { return -1; }
  158. static auto getTombstoneKey() -> int { return -2; }
  159. static auto getHashValue(const int val) -> unsigned {
  160. return static_cast<uint64_t>(HashValue(val));
  161. }
  162. static auto isEqual(const int lhs, const int rhs) -> bool {
  163. return lhs == rhs;
  164. }
  165. };
  166. template <int LowZeroBits>
  167. struct CarbonHashDI<LowZeroBitInt<LowZeroBits>> {
  168. using IntT = LowZeroBitInt<LowZeroBits>;
  169. static auto getEmptyKey() -> IntT { return IntT(-1); }
  170. static auto getTombstoneKey() -> IntT { return IntT(-2); }
  171. static auto getHashValue(const IntT val) -> unsigned {
  172. return static_cast<uint64_t>(HashValue(val));
  173. }
  174. static auto isEqual(const IntT lhs, const IntT rhs) -> bool {
  175. return lhs == rhs;
  176. }
  177. };
  178. template <typename T>
  179. struct CarbonHashDI<T*> {
  180. static constexpr uintptr_t Log2MaxAlign = 12;
  181. static auto getEmptyKey() -> T* {
  182. auto val = static_cast<uintptr_t>(-1);
  183. val <<= Log2MaxAlign;
  184. // NOLINTNEXTLINE(performance-no-int-to-ptr): This is required by the API.
  185. return reinterpret_cast<int*>(val);
  186. }
  187. static auto getTombstoneKey() -> T* {
  188. auto val = static_cast<uintptr_t>(-2);
  189. val <<= Log2MaxAlign;
  190. // NOLINTNEXTLINE(performance-no-int-to-ptr): This is required by the API.
  191. return reinterpret_cast<int*>(val);
  192. }
  193. static auto getHashValue(const T* ptr_val) -> unsigned {
  194. return static_cast<uint64_t>(HashValue(ptr_val));
  195. }
  196. static auto isEqual(const T* lhs, const T* rhs) -> bool { return lhs == rhs; }
  197. };
  198. template <>
  199. struct CarbonHashDI<llvm::StringRef> {
  200. static auto getEmptyKey() -> llvm::StringRef {
  201. return llvm::StringRef(
  202. // NOLINTNEXTLINE(performance-no-int-to-ptr): Required by the API.
  203. reinterpret_cast<const char*>(~static_cast<uintptr_t>(0)), 0);
  204. }
  205. static auto getTombstoneKey() -> llvm::StringRef {
  206. return llvm::StringRef(
  207. // NOLINTNEXTLINE(performance-no-int-to-ptr): Required by the API.
  208. reinterpret_cast<const char*>(~static_cast<uintptr_t>(1)), 0);
  209. }
  210. static auto getHashValue(llvm::StringRef val) -> unsigned {
  211. return static_cast<uint64_t>(HashValue(val));
  212. }
  213. static auto isEqual(llvm::StringRef lhs, llvm::StringRef rhs) -> bool {
  214. if (rhs.data() == getEmptyKey().data()) {
  215. return lhs.data() == getEmptyKey().data();
  216. }
  217. if (rhs.data() == getTombstoneKey().data()) {
  218. return lhs.data() == getTombstoneKey().data();
  219. }
  220. return lhs == rhs;
  221. }
  222. };
  223. template <typename TableT>
  224. auto ReportTableMetrics(const TableT& table, benchmark::State& state) -> void {
  225. // While this count is "iteration invariant" (it should be exactly the same
  226. // for every iteration as the set of keys is the same), we don't use that
  227. // because it will scale this by the number of iterations. We want to
  228. // display the metrics for this benchmark *parameter*, not what resulted
  229. // from the number of iterations. That means we use the normal counter API
  230. // without flags.
  231. auto metrics = table.ComputeMetrics();
  232. state.counters["P-compares"] = metrics.probe_avg_compares;
  233. state.counters["P-distance"] = metrics.probe_avg_distance;
  234. state.counters["P-fraction"] =
  235. static_cast<double>(metrics.probed_key_count) / metrics.key_count;
  236. state.counters["Pmax-distance"] = metrics.probe_max_distance;
  237. state.counters["Pmax-compares"] = metrics.probe_max_compares;
  238. state.counters["Probed"] = metrics.probed_key_count;
  239. state.counters["Storage"] = metrics.storage_bytes;
  240. // Also compute how 'efficient' the storage is, 1.0 being zero bytes outside
  241. // of key and value.
  242. ssize_t element_size;
  243. if constexpr (requires { TableT::ValueT; }) {
  244. element_size =
  245. sizeof(typename TableT::KeyT) + sizeof(typename TableT::ValueT);
  246. } else {
  247. element_size = sizeof(typename TableT::KeyT);
  248. }
  249. state.counters["Storage eff"] =
  250. static_cast<double>(metrics.key_count * element_size) /
  251. metrics.storage_bytes;
  252. }
  253. } // namespace Carbon::RawHashtable
  254. namespace llvm {
  255. // Enable LLVM to hash our special stress testing integer type.
  256. template <int LowZeroBits>
  257. struct DenseMapInfo<Carbon::RawHashtable::LowZeroBitInt<LowZeroBits>> {
  258. using IntT = Carbon::RawHashtable::LowZeroBitInt<LowZeroBits>;
  259. static auto getEmptyKey() -> IntT { return IntT(-1); }
  260. static auto getTombstoneKey() -> IntT { return IntT(-2); }
  261. static auto getHashValue(const IntT val) -> unsigned {
  262. return DenseMapInfo<int64_t>::getHashValue(val.shifted_value);
  263. }
  264. static auto isEqual(const IntT lhs, const IntT rhs) -> bool {
  265. return lhs == rhs;
  266. }
  267. };
  268. } // namespace llvm
  269. #endif // CARBON_COMMON_RAW_HASHTABLE_BENCHMARK_HELPERS_H_