map_benchmark.cpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include <benchmark/benchmark.h>
  5. #include <boost/unordered/unordered_flat_map.hpp>
  6. #include <type_traits>
  7. #include "absl/container/flat_hash_map.h"
  8. #include "common/map.h"
  9. #include "common/raw_hashtable_benchmark_helpers.h"
  10. #include "llvm/ADT/DenseMap.h"
  11. namespace Carbon {
  12. namespace {
  13. using RawHashtable::CarbonHashDI;
  14. using RawHashtable::GetKeysAndHitKeys;
  15. using RawHashtable::GetKeysAndMissKeys;
  16. using RawHashtable::HitArgs;
  17. using RawHashtable::ReportTableMetrics;
  18. using RawHashtable::SizeArgs;
  19. using RawHashtable::ValueToBool;
  20. // Helpers to synthesize some value of one of the three types we use as value
  21. // types.
  22. template <typename T>
  23. auto MakeValue() -> T {
  24. if constexpr (std::is_same_v<T, llvm::StringRef>) {
  25. return "abc";
  26. } else if constexpr (std::is_pointer_v<T>) {
  27. static std::remove_pointer_t<T> x;
  28. return &x;
  29. } else {
  30. return 42;
  31. }
  32. }
  33. template <typename T>
  34. auto MakeValue2() -> T {
  35. if constexpr (std::is_same_v<T, llvm::StringRef>) {
  36. return "qux";
  37. } else if constexpr (std::is_pointer_v<T>) {
  38. static std::remove_pointer_t<T> y;
  39. return &y;
  40. } else {
  41. return 7;
  42. }
  43. }
  44. template <typename MapT>
  45. struct IsCarbonMapImpl : std::false_type {};
  46. template <typename KT, typename VT, int MinSmallSize>
  47. struct IsCarbonMapImpl<Map<KT, VT, MinSmallSize>> : std::true_type {};
  48. template <typename MapT>
  49. static constexpr bool IsCarbonMap = IsCarbonMapImpl<MapT>::value;
  50. // A wrapper around various map types that we specialize to implement a common
  51. // API used in the benchmarks for various different map data structures that
  52. // support different APIs. The primary template assumes a roughly
  53. // `std::unordered_map` API design, and types with a different API design are
  54. // supported through specializations.
  55. template <typename MapT>
  56. struct MapWrapperImpl {
  57. using KeyT = typename MapT::key_type;
  58. using ValueT = typename MapT::mapped_type;
  59. MapT m;
  60. auto BenchContains(KeyT k) -> bool { return m.find(k) != m.end(); }
  61. auto BenchLookup(KeyT k) -> bool {
  62. auto it = m.find(k);
  63. if (it == m.end()) {
  64. return false;
  65. }
  66. return ValueToBool(it->second);
  67. }
  68. auto BenchInsert(KeyT k, ValueT v) -> bool {
  69. auto result = m.insert({k, v});
  70. return result.second;
  71. }
  72. auto BenchUpdate(KeyT k, ValueT v) -> bool {
  73. auto result = m.insert({k, v});
  74. result.first->second = v;
  75. return result.second;
  76. }
  77. auto BenchErase(KeyT k) -> bool { return m.erase(k) != 0; }
  78. };
  79. // Explicit (partial) specialization for the Carbon map type that uses its
  80. // different API design.
  81. template <typename KT, typename VT, int MinSmallSize>
  82. struct MapWrapperImpl<Map<KT, VT, MinSmallSize>> {
  83. using MapT = Map<KT, VT, MinSmallSize>;
  84. using KeyT = KT;
  85. using ValueT = VT;
  86. MapT m;
  87. auto BenchContains(KeyT k) -> bool { return m.Contains(k); }
  88. auto BenchLookup(KeyT k) -> bool {
  89. auto result = m.Lookup(k);
  90. if (!result) {
  91. return false;
  92. }
  93. return ValueToBool(result.value());
  94. }
  95. auto BenchInsert(KeyT k, ValueT v) -> bool {
  96. auto result = m.Insert(k, v);
  97. return result.is_inserted();
  98. }
  99. auto BenchUpdate(KeyT k, ValueT v) -> bool {
  100. auto result = m.Update(k, v);
  101. return result.is_inserted();
  102. }
  103. auto BenchErase(KeyT k) -> bool { return m.Erase(k); }
  104. };
  105. // Provide a way to override the Carbon Map specific benchmark runs with another
  106. // hashtable implementation. When building, you can use one of these enum names
  107. // in a macro define such as `-DCARBON_MAP_BENCH_OVERRIDE=Name` in order to
  108. // trigger a specific override for the `Map` type benchmarks. This is used to
  109. // get before/after runs that compare the performance of Carbon's Map versus
  110. // other implementations.
  111. enum class MapOverride : uint8_t {
  112. None,
  113. Abseil,
  114. Boost,
  115. LLVM,
  116. LLVMAndCarbonHash,
  117. };
  118. #ifndef CARBON_MAP_BENCH_OVERRIDE
  119. #define CARBON_MAP_BENCH_OVERRIDE None
  120. #endif
  121. template <typename MapT, MapOverride Override>
  122. struct MapWrapperOverride : MapWrapperImpl<MapT> {};
  123. template <typename KeyT, typename ValueT, int MinSmallSize>
  124. struct MapWrapperOverride<Map<KeyT, ValueT, MinSmallSize>, MapOverride::Abseil>
  125. : MapWrapperImpl<absl::flat_hash_map<KeyT, ValueT>> {};
  126. template <typename KeyT, typename ValueT, int MinSmallSize>
  127. struct MapWrapperOverride<Map<KeyT, ValueT, MinSmallSize>, MapOverride::Boost>
  128. : MapWrapperImpl<boost::unordered::unordered_flat_map<KeyT, ValueT>> {};
  129. template <typename KeyT, typename ValueT, int MinSmallSize>
  130. struct MapWrapperOverride<Map<KeyT, ValueT, MinSmallSize>, MapOverride::LLVM>
  131. : MapWrapperImpl<llvm::DenseMap<KeyT, ValueT>> {};
  132. template <typename KeyT, typename ValueT, int MinSmallSize>
  133. struct MapWrapperOverride<Map<KeyT, ValueT, MinSmallSize>,
  134. MapOverride::LLVMAndCarbonHash>
  135. : MapWrapperImpl<llvm::DenseMap<KeyT, ValueT, CarbonHashDI<KeyT>>> {};
  136. template <typename MapT>
  137. using MapWrapper =
  138. MapWrapperOverride<MapT, MapOverride::CARBON_MAP_BENCH_OVERRIDE>;
  139. template <typename MapT>
  140. auto ReportMetrics(const MapWrapper<MapT>& m_wrapper, benchmark::State& state)
  141. -> void {
  142. // Report some extra statistics about the Carbon type.
  143. if constexpr (IsCarbonMap<MapT>) {
  144. ReportTableMetrics(m_wrapper.m, state);
  145. }
  146. }
  147. // NOLINTBEGIN(bugprone-macro-parentheses): Parentheses are incorrect here.
  148. #define MAP_BENCHMARK_ONE_OP_SIZE(NAME, APPLY, KT, VT) \
  149. BENCHMARK(NAME<Map<KT, VT>>)->Apply(APPLY); \
  150. BENCHMARK(NAME<absl::flat_hash_map<KT, VT>>)->Apply(APPLY); \
  151. BENCHMARK(NAME<boost::unordered::unordered_flat_map<KT, VT>>)->Apply(APPLY); \
  152. BENCHMARK(NAME<llvm::DenseMap<KT, VT>>)->Apply(APPLY); \
  153. BENCHMARK(NAME<llvm::DenseMap<KT, VT, CarbonHashDI<KT>>>)->Apply(APPLY)
  154. // NOLINTEND(bugprone-macro-parentheses)
  155. #define MAP_BENCHMARK_ONE_OP(NAME, APPLY) \
  156. MAP_BENCHMARK_ONE_OP_SIZE(NAME, APPLY, int, int); \
  157. MAP_BENCHMARK_ONE_OP_SIZE(NAME, APPLY, int*, int*); \
  158. MAP_BENCHMARK_ONE_OP_SIZE(NAME, APPLY, int, llvm::StringRef); \
  159. MAP_BENCHMARK_ONE_OP_SIZE(NAME, APPLY, llvm::StringRef, int)
  160. // Benchmark the minimal latency of checking if a key is contained within a map,
  161. // when it *is* definitely in that map. Because this is only really measuring
  162. // the *minimal* latency, it is more similar to a throughput benchmark.
  163. //
  164. // While this is structured to observe the latency of testing for presence of a
  165. // key, it is important to understand the reality of what this measures. Because
  166. // the boolean result testing for whether a key is in a map is fundamentally
  167. // provided not by accessing some data, but by branching on data to a control
  168. // flow path which sets the boolean to `true` or `false`, the result can be
  169. // speculatively provided based on predicting the conditional branch without
  170. // waiting for the results of the comparison to become available. And because
  171. // this is a small operation and we arrange for all the candidate keys to be
  172. // present, that branch *should* be predicted extremely well. The result is that
  173. // this measures the un-speculated latency of testing for presence which should
  174. // be small or zero. Which is why this is ultimately more similar to a
  175. // throughput benchmark.
  176. //
  177. // Because of these measurement oddities, the specific measurements here may not
  178. // be very interesting for predicting real-world performance in any way, but
  179. // they are useful for comparing how 'cheap' the operation is across changes to
  180. // the data structure or between similar data structures with similar
  181. // properties.
  182. template <typename MapT>
  183. static void BM_MapContainsHit(benchmark::State& state) {
  184. using MapWrapperT = MapWrapper<MapT>;
  185. using KT = typename MapWrapperT::KeyT;
  186. using VT = typename MapWrapperT::ValueT;
  187. MapWrapperT m;
  188. auto [keys, lookup_keys] =
  189. GetKeysAndHitKeys<KT>(state.range(0), state.range(1));
  190. for (auto k : keys) {
  191. m.BenchInsert(k, MakeValue<VT>());
  192. }
  193. ssize_t lookup_keys_size = lookup_keys.size();
  194. while (state.KeepRunningBatch(lookup_keys_size)) {
  195. for (ssize_t i = 0; i < lookup_keys_size;) {
  196. // We block optimizing `i` as that has proven both more effective at
  197. // blocking the loop from being optimized away and avoiding disruption of
  198. // the generated code that we're benchmarking.
  199. benchmark::DoNotOptimize(i);
  200. bool result = m.BenchContains(lookup_keys[i]);
  201. CARBON_DCHECK(result);
  202. // We use the lookup success to step through keys, establishing a
  203. // dependency between each lookup. This doesn't fully allow us to measure
  204. // latency rather than throughput, as noted above.
  205. i += static_cast<ssize_t>(result);
  206. }
  207. }
  208. ReportMetrics(m, state);
  209. }
  210. MAP_BENCHMARK_ONE_OP(BM_MapContainsHit, HitArgs);
  211. // Similar to `BM_MapContainsHit`, while this is structured as a latency
  212. // benchmark, the critical path is expected to be well predicted and so it
  213. // should turn into something closer to a throughput benchmark.
  214. template <typename MapT>
  215. static void BM_MapContainsMiss(benchmark::State& state) {
  216. using MapWrapperT = MapWrapper<MapT>;
  217. using KT = typename MapWrapperT::KeyT;
  218. using VT = typename MapWrapperT::ValueT;
  219. MapWrapperT m;
  220. auto [keys, lookup_keys] = GetKeysAndMissKeys<KT>(state.range(0));
  221. for (auto k : keys) {
  222. m.BenchInsert(k, MakeValue<VT>());
  223. }
  224. ssize_t lookup_keys_size = lookup_keys.size();
  225. while (state.KeepRunningBatch(lookup_keys_size)) {
  226. for (ssize_t i = 0; i < lookup_keys_size;) {
  227. benchmark::DoNotOptimize(i);
  228. bool result = m.BenchContains(lookup_keys[i]);
  229. CARBON_DCHECK(!result);
  230. i += static_cast<ssize_t>(!result);
  231. }
  232. }
  233. ReportMetrics(m, state);
  234. }
  235. MAP_BENCHMARK_ONE_OP(BM_MapContainsMiss, SizeArgs);
  236. // This is a genuine latency benchmark. We lookup a key in the hashtable and use
  237. // the value associated with that key in the critical path of loading the next
  238. // iteration's key. We still ensure the keys are always present, and so we
  239. // generally expect the data structure branches to be well predicted. But we
  240. // vary the keys aggressively to avoid any prediction artifacts from repeatedly
  241. // examining the same key.
  242. //
  243. // This latency can be very helpful for understanding a range of data structure
  244. // behaviors:
  245. // - Many users of hashtables are directly dependent on the latency of this
  246. // operation, and this micro-benchmark will reflect the expected latency for
  247. // them.
  248. // - Showing how latency varies across different sizes of table and different
  249. // fractions of the table being accessed (and thus needing space in the
  250. // cache).
  251. //
  252. // However, it remains an ultimately synthetic and unrepresentative benchmark.
  253. // It should primarily be used to understand the relative cost of these
  254. // operations between versions of the data structure or between related data
  255. // structures.
  256. //
  257. // We vary both the number of entries in the table and the number of distinct
  258. // keys used when doing lookups. As the table becomes large, the latter dictates
  259. // the fraction of the table that will be accessed and thus the working set size
  260. // of the benchmark. Querying the same small number of keys in even a large
  261. // table doesn't actually encounter any cache pressure, so only a few of these
  262. // benchmarks will show any effects of the caching subsystem.
  263. template <typename MapT>
  264. static void BM_MapLookupHit(benchmark::State& state) {
  265. using MapWrapperT = MapWrapper<MapT>;
  266. using KT = typename MapWrapperT::KeyT;
  267. using VT = typename MapWrapperT::ValueT;
  268. MapWrapperT m;
  269. auto [keys, lookup_keys] =
  270. GetKeysAndHitKeys<KT>(state.range(0), state.range(1));
  271. for (auto k : keys) {
  272. m.BenchInsert(k, MakeValue<VT>());
  273. }
  274. ssize_t lookup_keys_size = lookup_keys.size();
  275. while (state.KeepRunningBatch(lookup_keys_size)) {
  276. for (ssize_t i = 0; i < lookup_keys_size;) {
  277. benchmark::DoNotOptimize(i);
  278. bool result = m.BenchLookup(lookup_keys[i]);
  279. CARBON_DCHECK(result);
  280. i += static_cast<ssize_t>(result);
  281. }
  282. }
  283. ReportMetrics(m, state);
  284. }
  285. MAP_BENCHMARK_ONE_OP(BM_MapLookupHit, HitArgs);
  286. // This is an update throughput benchmark in practice. While whether the key was
  287. // a hit is kept in the critical path, we only use keys that are hits and so
  288. // expect that to be fully predicted and speculated.
  289. //
  290. // However, we expect this fairly closely matches how user code interacts with
  291. // an update-style API. It will have some conditional testing (even if just an
  292. // assert) on whether the key was a hit and otherwise continue executing. As a
  293. // consequence the actual update is expected to not be in a meaningful critical
  294. // path.
  295. //
  296. // This still provides a basic way to measure the cost of this operation,
  297. // especially when comparing between implementations or across different hash
  298. // tables.
  299. template <typename MapT>
  300. static void BM_MapUpdateHit(benchmark::State& state) {
  301. using MapWrapperT = MapWrapper<MapT>;
  302. using KT = typename MapWrapperT::KeyT;
  303. using VT = typename MapWrapperT::ValueT;
  304. MapWrapperT m;
  305. auto [keys, lookup_keys] =
  306. GetKeysAndHitKeys<KT>(state.range(0), state.range(1));
  307. for (auto k : keys) {
  308. m.BenchInsert(k, MakeValue<VT>());
  309. }
  310. ssize_t lookup_keys_size = lookup_keys.size();
  311. while (state.KeepRunningBatch(lookup_keys_size)) {
  312. for (ssize_t i = 0; i < lookup_keys_size; ++i) {
  313. benchmark::DoNotOptimize(i);
  314. bool inserted = m.BenchUpdate(lookup_keys[i], MakeValue2<VT>());
  315. CARBON_DCHECK(!inserted);
  316. }
  317. }
  318. ReportMetrics(m, state);
  319. }
  320. MAP_BENCHMARK_ONE_OP(BM_MapUpdateHit, HitArgs);
  321. // First erase and then insert the key. The code path will always be the same
  322. // here and so we expect this to largely be a throughput benchmark because of
  323. // branch prediction and speculative execution.
  324. //
  325. // We don't expect erase followed by insertion to be a common user code
  326. // sequence, but we don't have a good way of benchmarking either erase or insert
  327. // in isolation -- each would change the size of the table and thus the next
  328. // iteration's benchmark. And if we try to correct the table size outside of the
  329. // timed region, we end up trying to exclude too fine grained of a region from
  330. // timers to get good measurement data.
  331. //
  332. // Our solution is to benchmark both erase and insertion back to back. We can
  333. // then get a good profile of the code sequence of each, and at least measure
  334. // the sum cost of these reliably. Careful profiling can help attribute that
  335. // cost between erase and insert in order to understand which of the two
  336. // operations is contributing most to any performance artifacts observed.
  337. template <typename MapT>
  338. static void BM_MapEraseUpdateHit(benchmark::State& state) {
  339. using MapWrapperT = MapWrapper<MapT>;
  340. using KT = typename MapWrapperT::KeyT;
  341. using VT = typename MapWrapperT::ValueT;
  342. MapWrapperT m;
  343. auto [keys, lookup_keys] =
  344. GetKeysAndHitKeys<KT>(state.range(0), state.range(1));
  345. for (auto k : keys) {
  346. m.BenchInsert(k, MakeValue<VT>());
  347. }
  348. ssize_t lookup_keys_size = lookup_keys.size();
  349. while (state.KeepRunningBatch(lookup_keys_size)) {
  350. for (ssize_t i = 0; i < lookup_keys_size; ++i) {
  351. benchmark::DoNotOptimize(i);
  352. m.BenchErase(lookup_keys[i]);
  353. benchmark::ClobberMemory();
  354. bool inserted = m.BenchUpdate(lookup_keys[i], MakeValue2<VT>());
  355. CARBON_DCHECK(inserted);
  356. }
  357. }
  358. }
  359. MAP_BENCHMARK_ONE_OP(BM_MapEraseUpdateHit, HitArgs);
  360. // NOLINTBEGIN(bugprone-macro-parentheses): Parentheses are incorrect here.
  361. #define MAP_BENCHMARK_OP_SEQ_SIZE(NAME, KT, VT) \
  362. BENCHMARK(NAME<Map<KT, VT>>)->Apply(SizeArgs); \
  363. BENCHMARK(NAME<absl::flat_hash_map<KT, VT>>)->Apply(SizeArgs); \
  364. BENCHMARK(NAME<boost::unordered::unordered_flat_map<KT, VT>>) \
  365. ->Apply(SizeArgs); \
  366. BENCHMARK(NAME<llvm::DenseMap<KT, VT>>)->Apply(APPLY); \
  367. BENCHMARK(NAME<llvm::DenseMap<KT, VT, CarbonHashDI<KT>>>)->Apply(SizeArgs)
  368. // NOLINTEND(bugprone-macro-parentheses)
  369. #define MAP_BENCHMARK_OP_SEQ(NAME) \
  370. MAP_BENCHMARK_OP_SEQ_SIZE(NAME, int, int); \
  371. MAP_BENCHMARK_OP_SEQ_SIZE(NAME, int*, int*); \
  372. MAP_BENCHMARK_OP_SEQ_SIZE(NAME, int, llvm::StringRef); \
  373. MAP_BENCHMARK_OP_SEQ_SIZE(NAME, llvm::StringRef, int)
  374. // This is an interesting, somewhat specialized benchmark that measures the cost
  375. // of inserting a sequence of key/value pairs into a table with no collisions up
  376. // to some size and then inserting a colliding key and throwing away the table.
  377. //
  378. // This can give an idea of the cost of building up a map of a particular size,
  379. // but without actually using it. Or of algorithms like cycle-detection which
  380. // for some reason need an associative container.
  381. //
  382. // It also covers both the insert-into-an-empty-slot code path that isn't
  383. // covered elsewhere, and the code path for growing a table to a larger size.
  384. //
  385. // Because this benchmark operates on whole maps, we also compute the number of
  386. // probed keys for Carbon's set as that is both a general reflection of the
  387. // efficacy of the underlying hash function, and a direct factor that drives the
  388. // cost of these operations.
  389. template <typename MapT>
  390. static void BM_MapInsertSeq(benchmark::State& state) {
  391. using MapWrapperT = MapWrapper<MapT>;
  392. using KT = typename MapWrapperT::KeyT;
  393. using VT = typename MapWrapperT::ValueT;
  394. constexpr ssize_t LookupKeysSize = 1 << 8;
  395. auto [keys, lookup_keys] =
  396. GetKeysAndHitKeys<KT>(state.range(0), LookupKeysSize);
  397. // Note that we don't force batches that use all the lookup keys because
  398. // there's no difference in cache usage by covering all the different lookup
  399. // keys.
  400. ssize_t i = 0;
  401. for (auto _ : state) {
  402. benchmark::DoNotOptimize(i);
  403. MapWrapperT m;
  404. for (auto k : keys) {
  405. bool inserted = m.BenchInsert(k, MakeValue<VT>());
  406. CARBON_DCHECK(inserted, "Must be a successful insert!");
  407. }
  408. // Now insert a final random repeated key.
  409. bool inserted = m.BenchInsert(lookup_keys[i], MakeValue2<VT>());
  410. CARBON_DCHECK(!inserted, "Must already be in the map!");
  411. // Rotate through the shuffled keys.
  412. i = (i + static_cast<ssize_t>(!inserted)) & (LookupKeysSize - 1);
  413. }
  414. // It can be easier in some cases to think of this as a key-throughput rate of
  415. // insertion rather than the latency of inserting N keys, so construct the
  416. // rate counter as well.
  417. state.counters["KeyRate"] = benchmark::Counter(
  418. keys.size(), benchmark::Counter::kIsIterationInvariantRate);
  419. // Report some extra statistics about the Carbon type.
  420. if constexpr (IsCarbonMap<MapT>) {
  421. // Re-build a map outside of the timing loop to look at the statistics
  422. // rather than the timing.
  423. MapWrapperT m;
  424. for (auto k : keys) {
  425. bool inserted = m.BenchInsert(k, MakeValue<VT>());
  426. CARBON_DCHECK(inserted, "Must be a successful insert!");
  427. }
  428. ReportMetrics(m, state);
  429. // Uncomment this call to print out statistics about the index-collisions
  430. // among these keys for debugging:
  431. //
  432. // RawHashtable::DumpHashStatistics(keys);
  433. }
  434. }
  435. MAP_BENCHMARK_ONE_OP(BM_MapInsertSeq, SizeArgs);
  436. } // namespace
  437. } // namespace Carbon