raw_hashtable_benchmark_helpers.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "common/raw_hashtable_benchmark_helpers.h"
  5. #include <array>
  6. #include <cstddef>
  7. #include <forward_list>
  8. #include <utility>
  9. #include <vector>
  10. namespace Carbon::RawHashtable {
  11. // A local shuffle implementation built on Abseil to improve performance in
  12. // debug builds.
  13. template <typename T>
  14. static auto Shuffle(llvm::MutableArrayRef<T> data, absl::BitGen& gen) {
  15. for (ssize_t i : llvm::seq<ssize_t>(0, data.size() - 1)) {
  16. ssize_t j = absl::Uniform<ssize_t>(gen, 0, data.size() - i);
  17. if (j != 0) {
  18. std::swap(data[i], data[i + j]);
  19. }
  20. }
  21. }
  22. constexpr ssize_t NumChars = 64;
  23. static_assert(llvm::isPowerOf2_64(NumChars));
  24. // For benchmarking, we use short strings in a fixed distribution with common
  25. // characters. Real-world strings aren't uniform across ASCII or Unicode, etc.
  26. // And for *micro*-benchmarking we want to focus on the map overhead with short,
  27. // fast keys.
  28. static auto MakeChars() -> llvm::OwningArrayRef<char> {
  29. llvm::OwningArrayRef<char> characters(NumChars);
  30. // Start with `-` and `_`, and then add `a` - `z`, `A` - `Z`, and `0` - `9`.
  31. characters[0] = '-';
  32. characters[1] = '_';
  33. ssize_t i = 2;
  34. for (auto range :
  35. {llvm::seq_inclusive('a', 'z'), llvm::seq_inclusive('A', 'Z'),
  36. llvm::seq_inclusive('0', '9')}) {
  37. for (char c : range) {
  38. characters[i] = c;
  39. ++i;
  40. }
  41. }
  42. CARBON_CHECK(i == NumChars,
  43. "Expected exactly {0} characters, got {1} instead!", NumChars,
  44. i);
  45. return characters;
  46. }
  47. constexpr ssize_t NumFourCharStrs = NumChars * NumChars * NumChars * NumChars;
  48. static_assert(llvm::isPowerOf2_64(NumFourCharStrs));
  49. // Compute every 4-character string in a shuffled array. This is a little memory
  50. // intense -- 64 MiB -- but ends up being much cheaper by letting us reliably
  51. // select a unique 4-character sequence to avoid collisions.
  52. static auto MakeFourCharStrs(llvm::ArrayRef<char> characters, absl::BitGen& gen)
  53. -> llvm::OwningArrayRef<std::array<char, 4>> {
  54. constexpr ssize_t NumCharsMask = NumChars - 1;
  55. constexpr ssize_t NumCharsShift = llvm::ConstantLog2<NumChars>();
  56. llvm::OwningArrayRef<std::array<char, 4>> four_char_strs(NumFourCharStrs);
  57. for (auto [i, str] : llvm::enumerate(four_char_strs)) {
  58. str[0] = characters[i & NumCharsMask];
  59. i >>= NumCharsShift;
  60. str[1] = characters[i & NumCharsMask];
  61. i >>= NumCharsShift;
  62. str[2] = characters[i & NumCharsMask];
  63. i >>= NumCharsShift;
  64. CARBON_CHECK((i & ~NumCharsMask) == 0);
  65. str[3] = characters[i];
  66. }
  67. Shuffle(four_char_strs, gen);
  68. return four_char_strs;
  69. }
  70. constexpr ssize_t NumRandomChars = static_cast<ssize_t>(64) * 1024;
  71. // Create a pool of random characters to sample from rather than computing this
  72. // for every string which is very slow in debug builds. We also pad this pool
  73. // with the max length so we can pull the full length from the end to simplify
  74. // the logic when wrapping around the pool.
  75. static auto MakeRandomChars(llvm::ArrayRef<char> characters, int max_length,
  76. absl::BitGen& gen) -> llvm::OwningArrayRef<char> {
  77. llvm::OwningArrayRef<char> random_chars(NumRandomChars + max_length);
  78. for (char& c : random_chars) {
  79. c = characters[absl::Uniform<ssize_t>(gen, 0, NumChars)];
  80. }
  81. return random_chars;
  82. }
  83. // Make a small vector of pointers into a single allocation of raw strings. The
  84. // allocated memory is expected to leak and must be transitively referenced by a
  85. // global. Each string has `length` size (which must be >= 4), and there are
  86. // `key_count` keys in the result. Each key is filled from the `random_chars`
  87. // until the last 4 characters. The last four characters of each string will be
  88. // taken sequentially from `four_char_strs` from some random start position to
  89. // ensure no duplicate keys are produced.
  90. static auto MakeRawStrKeys(ssize_t length, ssize_t key_count,
  91. llvm::ArrayRef<std::array<char, 4>> four_char_strs,
  92. llvm::ArrayRef<char> random_chars, absl::BitGen& gen)
  93. -> llvm::SmallVector<const char*> {
  94. llvm::SmallVector<const char*> raw_keys;
  95. CARBON_CHECK(length >= 4);
  96. ssize_t prefix_length = length - 4;
  97. // Select a random start for indexing our four character strings.
  98. ssize_t four_char_index = absl::Uniform<ssize_t>(gen, 0, NumFourCharStrs);
  99. // Select a random start for the prefix random characters.
  100. ssize_t random_chars_index = absl::Uniform<ssize_t>(gen, 0, NumRandomChars);
  101. // Do a single memory allocation for all the keys of this length to
  102. // avoid an excessive number of small and fragmented allocations. This
  103. // memory is intentionally leaked as the keys are global and will
  104. // themselves will point into it.
  105. char* key_text = new char[key_count * length];
  106. // Reserve all the key space since we know how many we'll need.
  107. raw_keys.reserve(key_count);
  108. for ([[gnu::unused]] ssize_t i : llvm::seq<ssize_t>(0, key_count)) {
  109. memcpy(key_text, random_chars.data() + random_chars_index, prefix_length);
  110. random_chars_index += prefix_length;
  111. random_chars_index &= NumRandomChars - 1;
  112. // Set the last four characters with this entry in the shuffled
  113. // sequence.
  114. memcpy(key_text + prefix_length, four_char_strs[four_char_index].data(), 4);
  115. // Step through the shuffled sequence. We start at a random position,
  116. // so we need to wrap around the end.
  117. ++four_char_index;
  118. four_char_index &= NumFourCharStrs - 1;
  119. // And finally save the start pointer as one of our raw keys.
  120. raw_keys.push_back(key_text);
  121. key_text += length;
  122. }
  123. return raw_keys;
  124. }
  125. // Build up a large collection of random and unique string keys. This is
  126. // actually a relatively expensive operation due to needing to build all the
  127. // random string text. As a consequence, the initializer of this global is
  128. // somewhat performance tuned to ensure benchmarks don't take an excessive
  129. // amount of time to run or use an excessive amount of memory.
  130. static absl::NoDestructor<llvm::OwningArrayRef<llvm::StringRef>> raw_str_keys{
  131. [] {
  132. llvm::OwningArrayRef<llvm::StringRef> keys(MaxNumKeys);
  133. absl::BitGen gen;
  134. std::array length_buckets = {
  135. 4, 4, 4, 4, 5, 5, 5, 5, 7, 7, 10, 10, 15, 25, 40, 80,
  136. };
  137. static_assert((MaxNumKeys % length_buckets.size()) == 0);
  138. CARBON_CHECK(llvm::is_sorted(length_buckets));
  139. // For each distinct length bucket, we build a vector of raw keys.
  140. std::forward_list<llvm::SmallVector<const char*>> raw_keys_storage;
  141. // And a parallel array to the length buckets with the raw keys of that
  142. // length.
  143. std::array<llvm::SmallVector<const char*>*, length_buckets.size()>
  144. raw_keys_buckets;
  145. llvm::OwningArrayRef<char> characters = MakeChars();
  146. llvm::OwningArrayRef<std::array<char, 4>> four_char_strs =
  147. MakeFourCharStrs(characters, gen);
  148. llvm::OwningArrayRef<char> random_chars = MakeRandomChars(
  149. characters, /*max_length=*/length_buckets.back(), gen);
  150. ssize_t prev_length = -1;
  151. for (auto [length_index, length] : llvm::enumerate(length_buckets)) {
  152. // We can detect repetitions in length as they are sorted.
  153. if (length == prev_length) {
  154. raw_keys_buckets[length_index] = raw_keys_buckets[length_index - 1];
  155. continue;
  156. }
  157. prev_length = length;
  158. // We want to compute all the keys of this length that we'll need.
  159. ssize_t key_count = (MaxNumKeys / length_buckets.size()) *
  160. llvm::count(length_buckets, length);
  161. raw_keys_buckets[length_index] =
  162. &raw_keys_storage.emplace_front(MakeRawStrKeys(
  163. length, key_count, four_char_strs, random_chars, gen));
  164. }
  165. // Now build the actual key array from our intermediate storage by
  166. // round-robin extracting from the length buckets.
  167. for (auto [index, key] : llvm::enumerate(keys)) {
  168. ssize_t bucket = index % length_buckets.size();
  169. ssize_t length = length_buckets[bucket];
  170. // We pop a raw key from the list of them associated with this bucket.
  171. const char* raw_key = raw_keys_buckets[bucket]->pop_back_val();
  172. // And build our key from that.
  173. key = llvm::StringRef(raw_key, length);
  174. }
  175. // Check that in fact we popped every raw key into our main keys.
  176. for (const auto& raw_keys : raw_keys_storage) {
  177. CARBON_CHECK(raw_keys.empty());
  178. }
  179. return keys;
  180. }()};
  181. static absl::NoDestructor<llvm::OwningArrayRef<int*>> raw_ptr_keys{[] {
  182. llvm::OwningArrayRef<int*> keys(MaxNumKeys);
  183. for (auto [index, key] : llvm::enumerate(keys)) {
  184. // We leak these pointers -- this is a static initializer executed once.
  185. key = new int(static_cast<int>(index));
  186. }
  187. return keys;
  188. }()};
  189. static absl::NoDestructor<llvm::OwningArrayRef<int>> raw_int_keys{[] {
  190. llvm::OwningArrayRef<int> keys(MaxNumKeys);
  191. for (auto [index, key] : llvm::enumerate(keys)) {
  192. key = index + 1;
  193. }
  194. return keys;
  195. }()};
  196. template <int LowZeroBits>
  197. static absl::NoDestructor<llvm::OwningArrayRef<LowZeroBitInt<LowZeroBits>>>
  198. raw_low_zero_bit_int_keys{[] {
  199. llvm::OwningArrayRef<LowZeroBitInt<LowZeroBits>> keys(MaxNumKeys);
  200. for (auto [index, key] : llvm::enumerate(keys)) {
  201. key = LowZeroBitInt<LowZeroBits>(index + 1);
  202. }
  203. return keys;
  204. }()};
  205. namespace {
  206. // Allow generically dispatching over the specific key types we build and
  207. // support.
  208. template <typename T>
  209. auto GetRawKeys() -> llvm::ArrayRef<T> {
  210. if constexpr (std::is_same_v<T, llvm::StringRef>) {
  211. return *raw_str_keys;
  212. } else if constexpr (std::is_pointer_v<T>) {
  213. return *raw_ptr_keys;
  214. } else if constexpr (std::is_same_v<T, LowZeroBitInt<12>>) {
  215. return *raw_low_zero_bit_int_keys<12>;
  216. } else if constexpr (std::is_same_v<T, LowZeroBitInt<24>>) {
  217. return *raw_low_zero_bit_int_keys<24>;
  218. } else if constexpr (std::is_same_v<T, LowZeroBitInt<32>>) {
  219. return *raw_low_zero_bit_int_keys<32>;
  220. } else {
  221. return *raw_int_keys;
  222. }
  223. }
  224. template <typename T>
  225. static absl::NoDestructor<
  226. std::map<std::pair<ssize_t, ssize_t>, llvm::OwningArrayRef<T>>>
  227. lookup_keys_storage;
  228. // Given a particular table keys size and lookup keys size, provide an array ref
  229. // to a shuffled set of lookup keys.
  230. //
  231. // Because different table sizes pull from different sub-ranges of our raw keys,
  232. // we need to compute a distinct set of random keys in the table to use for
  233. // lookups depending on the table size. And we also want to have an even
  234. // distribution of key *sizes* throughout the lookup keys, and so we can't
  235. // compute a single lookup keys array of the maximum size. Instead we need to
  236. // compute a distinct special set of lookup keys for each pair of table and
  237. // lookup size, and then shuffle that specific set into a random sequence that
  238. // is returned. This function memoizes this sequence for each pair of sizes.
  239. template <typename T>
  240. auto GetShuffledLookupKeys(ssize_t table_keys_size, ssize_t lookup_keys_size)
  241. -> llvm::ArrayRef<T> {
  242. // The raw keys aren't shuffled and round-robin through the sizes. We want to
  243. // keep the total size of lookup keys used exactly the same across runs. So
  244. // for a given size we always take the leading sequence from the raw keys for
  245. // that size, duplicating as needed to get the desired lookup sequence size,
  246. // and then shuffle the keys in that sequence to end up with a random sequence
  247. // of keys. We store each of these shuffled sequences in a map to avoid
  248. // repeatedly computing them.
  249. llvm::OwningArrayRef<T>& lookup_keys =
  250. (*lookup_keys_storage<T>)[{table_keys_size, lookup_keys_size}];
  251. if (lookup_keys.empty()) {
  252. lookup_keys = llvm::OwningArrayRef<T>(lookup_keys_size);
  253. auto raw_keys = GetRawKeys<T>();
  254. for (auto [index, key] : llvm::enumerate(lookup_keys)) {
  255. key = raw_keys[index % table_keys_size];
  256. }
  257. absl::BitGen gen;
  258. Shuffle(lookup_keys, gen);
  259. }
  260. CARBON_CHECK(static_cast<ssize_t>(lookup_keys.size()) == lookup_keys_size);
  261. return lookup_keys;
  262. }
  263. } // namespace
  264. template <typename T>
  265. auto GetKeysAndMissKeys(ssize_t table_keys_size)
  266. -> std::pair<llvm::ArrayRef<T>, llvm::ArrayRef<T>> {
  267. CARBON_CHECK(table_keys_size <= MaxNumKeys);
  268. // The raw keys aren't shuffled and round-robin through the sizes. Take the
  269. // tail of this sequence and shuffle it to form a random set of miss keys with
  270. // a consistent total size.
  271. static absl::NoDestructor<llvm::OwningArrayRef<T>> miss_keys{[] {
  272. llvm::OwningArrayRef<T> keys;
  273. keys = GetRawKeys<T>().take_back(NumOtherKeys);
  274. CARBON_CHECK(keys.size() == NumOtherKeys);
  275. absl::BitGen gen;
  276. Shuffle(keys, gen);
  277. return keys;
  278. }()};
  279. return {GetRawKeys<T>().slice(0, table_keys_size), *miss_keys};
  280. }
  281. template auto GetKeysAndMissKeys<int>(ssize_t size)
  282. -> std::pair<llvm::ArrayRef<int>, llvm::ArrayRef<int>>;
  283. template auto GetKeysAndMissKeys<int*>(ssize_t size)
  284. -> std::pair<llvm::ArrayRef<int*>, llvm::ArrayRef<int*>>;
  285. template auto GetKeysAndMissKeys<llvm::StringRef>(ssize_t size)
  286. -> std::pair<llvm::ArrayRef<llvm::StringRef>,
  287. llvm::ArrayRef<llvm::StringRef>>;
  288. template auto GetKeysAndMissKeys<LowZeroBitInt<12>>(ssize_t size)
  289. -> std::pair<llvm::ArrayRef<LowZeroBitInt<12>>,
  290. llvm::ArrayRef<LowZeroBitInt<12>>>;
  291. template auto GetKeysAndMissKeys<LowZeroBitInt<24>>(ssize_t size)
  292. -> std::pair<llvm::ArrayRef<LowZeroBitInt<24>>,
  293. llvm::ArrayRef<LowZeroBitInt<24>>>;
  294. template auto GetKeysAndMissKeys<LowZeroBitInt<32>>(ssize_t size)
  295. -> std::pair<llvm::ArrayRef<LowZeroBitInt<32>>,
  296. llvm::ArrayRef<LowZeroBitInt<32>>>;
  297. template <typename T>
  298. auto GetKeysAndHitKeys(ssize_t table_keys_size, ssize_t lookup_keys_size)
  299. -> std::pair<llvm::ArrayRef<T>, llvm::ArrayRef<T>> {
  300. CARBON_CHECK(table_keys_size <= MaxNumKeys);
  301. CARBON_CHECK(lookup_keys_size <= MaxNumKeys);
  302. return {GetRawKeys<T>().slice(0, table_keys_size),
  303. GetShuffledLookupKeys<T>(table_keys_size, lookup_keys_size)};
  304. }
  305. template auto GetKeysAndHitKeys<int>(ssize_t size, ssize_t lookup_keys_size)
  306. -> std::pair<llvm::ArrayRef<int>, llvm::ArrayRef<int>>;
  307. template auto GetKeysAndHitKeys<int*>(ssize_t size, ssize_t lookup_keys_size)
  308. -> std::pair<llvm::ArrayRef<int*>, llvm::ArrayRef<int*>>;
  309. template auto GetKeysAndHitKeys<llvm::StringRef>(ssize_t size,
  310. ssize_t lookup_keys_size)
  311. -> std::pair<llvm::ArrayRef<llvm::StringRef>,
  312. llvm::ArrayRef<llvm::StringRef>>;
  313. template auto GetKeysAndHitKeys<LowZeroBitInt<12>>(ssize_t size,
  314. ssize_t lookup_keys_size)
  315. -> std::pair<llvm::ArrayRef<LowZeroBitInt<12>>,
  316. llvm::ArrayRef<LowZeroBitInt<12>>>;
  317. template auto GetKeysAndHitKeys<LowZeroBitInt<24>>(ssize_t size,
  318. ssize_t lookup_keys_size)
  319. -> std::pair<llvm::ArrayRef<LowZeroBitInt<24>>,
  320. llvm::ArrayRef<LowZeroBitInt<24>>>;
  321. template auto GetKeysAndHitKeys<LowZeroBitInt<32>>(ssize_t size,
  322. ssize_t lookup_keys_size)
  323. -> std::pair<llvm::ArrayRef<LowZeroBitInt<32>>,
  324. llvm::ArrayRef<LowZeroBitInt<32>>>;
  325. template <typename T>
  326. auto DumpHashStatistics(llvm::ArrayRef<T> keys) -> void {
  327. if (keys.size() < GroupSize) {
  328. return;
  329. }
  330. // The hash table load factor is 7/8ths, so we want to add 1/7th of our
  331. // current size, subtract one, and pick the next power of two to get the power
  332. // of two where 7/8ths is greater than or equal to the incoming key size.
  333. ssize_t expected_size =
  334. llvm::NextPowerOf2(keys.size() + (keys.size() / 7) - 1);
  335. constexpr int GroupShift = llvm::ConstantLog2<GroupSize>();
  336. size_t mask = ComputeProbeMaskFromSize(expected_size);
  337. uint64_t salt = ComputeSeed();
  338. auto get_hash_index = [mask, salt](auto x) -> ssize_t {
  339. auto [hash_index, _] = HashValue(x, salt).template ExtractIndexAndTag<7>();
  340. return (hash_index & mask) >> GroupShift;
  341. };
  342. std::vector<std::vector<int>> grouped_key_indices(expected_size >>
  343. GroupShift);
  344. for (auto [i, k] : llvm::enumerate(keys)) {
  345. ssize_t hash_index = get_hash_index(k);
  346. CARBON_CHECK(hash_index < (expected_size >> GroupShift), "{0}", hash_index);
  347. grouped_key_indices[hash_index].push_back(i);
  348. }
  349. ssize_t max_group_index =
  350. llvm::max_element(grouped_key_indices,
  351. [](const auto& lhs, const auto& rhs) {
  352. return lhs.size() < rhs.size();
  353. }) -
  354. grouped_key_indices.begin();
  355. // If the max number of collisions on the index is less than or equal to the
  356. // group size, there shouldn't be any necessary probing (outside of deletion)
  357. // and so this isn't interesting, skip printing.
  358. if (grouped_key_indices[max_group_index].size() <= GroupSize) {
  359. return;
  360. }
  361. llvm::errs() << "keys: " << keys.size()
  362. << " groups: " << grouped_key_indices.size() << "\n"
  363. << "max group index: " << llvm::formatv("{0x8}", max_group_index)
  364. << " collisions: "
  365. << grouped_key_indices[max_group_index].size() << "\n";
  366. for (auto i : llvm::ArrayRef(grouped_key_indices[max_group_index])
  367. .take_front(2 * GroupSize)) {
  368. auto k = keys[i];
  369. auto hash = static_cast<uint64_t>(HashValue(k, salt));
  370. llvm::errs() << " key: " << k
  371. << " salt: " << llvm::formatv("{0:x16}", salt)
  372. << " hash: " << llvm::formatv("{0:x16}", hash) << "\n";
  373. }
  374. }
  375. template auto DumpHashStatistics(llvm::ArrayRef<int> keys) -> void;
  376. template auto DumpHashStatistics(llvm::ArrayRef<LowZeroBitInt<12>> keys)
  377. -> void;
  378. template auto DumpHashStatistics(llvm::ArrayRef<LowZeroBitInt<24>> keys)
  379. -> void;
  380. template auto DumpHashStatistics(llvm::ArrayRef<LowZeroBitInt<32>> keys)
  381. -> void;
  382. template auto DumpHashStatistics(llvm::ArrayRef<int*> keys) -> void;
  383. template auto DumpHashStatistics(llvm::ArrayRef<llvm::StringRef> keys) -> void;
  384. } // namespace Carbon::RawHashtable