// Part of the Carbon Language project, under the Apache License v2.0 with LLVM // Exceptions. See /LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception #include "common/raw_hashtable_benchmark_helpers.h" #include #include #include #include #include namespace Carbon::RawHashtable { // A local shuffle implementation built on Abseil to improve performance in // debug builds. template static auto Shuffle(llvm::MutableArrayRef data, absl::BitGen& gen) { for (ssize_t i : llvm::seq(0, data.size() - 1)) { ssize_t j = absl::Uniform(gen, 0, data.size() - i); if (j != 0) { std::swap(data[i], data[i + j]); } } } constexpr ssize_t NumChars = 64; static_assert(llvm::isPowerOf2_64(NumChars)); // For benchmarking, we use short strings in a fixed distribution with common // characters. Real-world strings aren't uniform across ASCII or Unicode, etc. // And for *micro*-benchmarking we want to focus on the map overhead with short, // fast keys. static auto MakeChars() -> llvm::SmallVector { llvm::SmallVector characters; characters.reserve(NumChars); // Start with `-` and `_`, and then add `a` - `z`, `A` - `Z`, and `0` - `9`. characters.push_back('-'); characters.push_back('_'); llvm::append_range(characters, llvm::seq_inclusive('a', 'z')); llvm::append_range(characters, llvm::seq_inclusive('A', 'Z')); llvm::append_range(characters, llvm::seq_inclusive('0', '9')); CARBON_CHECK(characters.size() == NumChars, "Expected exactly {0} characters, got {1} instead!", NumChars, characters.size()); return characters; } constexpr ssize_t NumFourCharStrs = NumChars * NumChars * NumChars * NumChars; static_assert(llvm::isPowerOf2_64(NumFourCharStrs)); // Compute every 4-character string in a shuffled array. This is a little memory // intense -- 64 MiB -- but ends up being much cheaper by letting us reliably // select a unique 4-character sequence to avoid collisions. static auto MakeFourCharStrs(llvm::ArrayRef characters, absl::BitGen& gen) -> llvm::SmallVector> { constexpr ssize_t NumCharsMask = NumChars - 1; constexpr ssize_t NumCharsShift = llvm::ConstantLog2(); llvm::SmallVector> four_char_strs( llvm::map_range(llvm::seq(NumFourCharStrs), [&](auto i) { std::array str; str[0] = characters[i & NumCharsMask]; i >>= NumCharsShift; str[1] = characters[i & NumCharsMask]; i >>= NumCharsShift; str[2] = characters[i & NumCharsMask]; i >>= NumCharsShift; CARBON_CHECK((i & ~NumCharsMask) == 0); str[3] = characters[i]; return str; })); Shuffle>(four_char_strs, gen); return four_char_strs; } constexpr ssize_t NumRandomChars = static_cast(64) * 1024; // Create a pool of random characters to sample from rather than computing this // for every string which is very slow in debug builds. We also pad this pool // with the max length so we can pull the full length from the end to simplify // the logic when wrapping around the pool. static auto MakeRandomChars(llvm::ArrayRef characters, int max_length, absl::BitGen& gen) -> llvm::SmallVector { return llvm::SmallVector( llvm::map_range(llvm::seq(NumRandomChars + max_length), [&](auto /*i*/) { return characters[absl::Uniform(gen, 0, NumChars)]; })); } // Make a small vector of pointers into a single allocation of raw strings. The // allocated memory is expected to leak and must be transitively referenced by a // global. Each string has `length` size (which must be >= 4), and there are // `key_count` keys in the result. Each key is filled from the `random_chars` // until the last 4 characters. The last four characters of each string will be // taken sequentially from `four_char_strs` from some random start position to // ensure no duplicate keys are produced. static auto MakeRawStrKeys(ssize_t length, ssize_t key_count, llvm::ArrayRef> four_char_strs, llvm::ArrayRef random_chars, absl::BitGen& gen) -> llvm::SmallVector { llvm::SmallVector raw_keys; CARBON_CHECK(length >= 4); ssize_t prefix_length = length - 4; // Select a random start for indexing our four character strings. ssize_t four_char_index = absl::Uniform(gen, 0, NumFourCharStrs); // Select a random start for the prefix random characters. ssize_t random_chars_index = absl::Uniform(gen, 0, NumRandomChars); // Do a single memory allocation for all the keys of this length to // avoid an excessive number of small and fragmented allocations. This // memory is intentionally leaked as the keys are global and will // themselves will point into it. char* key_text = new char[key_count * length]; // Reserve all the key space since we know how many we'll need. raw_keys.reserve(key_count); for ([[gnu::unused]] ssize_t i : llvm::seq(0, key_count)) { memcpy(key_text, random_chars.data() + random_chars_index, prefix_length); random_chars_index += prefix_length; random_chars_index &= NumRandomChars - 1; // Set the last four characters with this entry in the shuffled // sequence. memcpy(key_text + prefix_length, four_char_strs[four_char_index].data(), 4); // Step through the shuffled sequence. We start at a random position, // so we need to wrap around the end. ++four_char_index; four_char_index &= NumFourCharStrs - 1; // And finally save the start pointer as one of our raw keys. raw_keys.push_back(key_text); key_text += length; } return raw_keys; } // Build up a large collection of random and unique string keys. This is // actually a relatively expensive operation due to needing to build all the // random string text. As a consequence, the initializer of this global is // somewhat performance tuned to ensure benchmarks don't take an excessive // amount of time to run or use an excessive amount of memory. static absl::NoDestructor> raw_str_keys{[] { llvm::SmallVector keys(MaxNumKeys); absl::BitGen gen; std::array length_buckets = { 4, 4, 4, 4, 5, 5, 5, 5, 7, 7, 10, 10, 15, 25, 40, 80, }; static_assert((MaxNumKeys % length_buckets.size()) == 0); CARBON_CHECK(llvm::is_sorted(length_buckets)); // For each distinct length bucket, we build a vector of raw keys. std::forward_list> raw_keys_storage; // And a parallel array to the length buckets with the raw keys of that // length. std::array*, length_buckets.size()> raw_keys_buckets; llvm::SmallVector characters = MakeChars(); llvm::SmallVector> four_char_strs = MakeFourCharStrs(characters, gen); llvm::SmallVector random_chars = MakeRandomChars(characters, /*max_length=*/length_buckets.back(), gen); ssize_t prev_length = -1; for (auto [length_index, length] : llvm::enumerate(length_buckets)) { // We can detect repetitions in length as they are sorted. if (length == prev_length) { raw_keys_buckets[length_index] = raw_keys_buckets[length_index - 1]; continue; } prev_length = length; // We want to compute all the keys of this length that we'll need. ssize_t key_count = (MaxNumKeys / length_buckets.size()) * llvm::count(length_buckets, length); raw_keys_buckets[length_index] = &raw_keys_storage.emplace_front( MakeRawStrKeys(length, key_count, four_char_strs, random_chars, gen)); } // Now build the actual key array from our intermediate storage by // round-robin extracting from the length buckets. for (auto [index, key] : llvm::enumerate(keys)) { ssize_t bucket = index % length_buckets.size(); ssize_t length = length_buckets[bucket]; // We pop a raw key from the list of them associated with this bucket. const char* raw_key = raw_keys_buckets[bucket]->pop_back_val(); // And build our key from that. key = llvm::StringRef(raw_key, length); } // Check that in fact we popped every raw key into our main keys. for (const auto& raw_keys : raw_keys_storage) { CARBON_CHECK(raw_keys.empty()); } return keys; }()}; static absl::NoDestructor> raw_ptr_keys(llvm::map_range( llvm::seq(MaxNumKeys), [](int index) { return new int{index}; })); static absl::NoDestructor> raw_int_keys(llvm::map_range( llvm::seq(MaxNumKeys), [](int index) { return index + 1; })); template static absl::NoDestructor>> raw_low_zero_bit_int_keys( llvm::map_range(llvm::seq(MaxNumKeys), [](int index) { return LowZeroBitInt(index + 1); })); namespace { // Allow generically dispatching over the specific key types we build and // support. template auto GetRawKeys() -> llvm::ArrayRef { if constexpr (std::is_same_v) { return *raw_str_keys; } else if constexpr (std::is_pointer_v) { return *raw_ptr_keys; } else if constexpr (std::is_same_v>) { return *raw_low_zero_bit_int_keys<12>; } else if constexpr (std::is_same_v>) { return *raw_low_zero_bit_int_keys<24>; } else if constexpr (std::is_same_v>) { return *raw_low_zero_bit_int_keys<32>; } else { return *raw_int_keys; } } template static absl::NoDestructor< std::map, llvm::SmallVector>> lookup_keys_storage; // Given a particular table keys size and lookup keys size, provide an array ref // to a shuffled set of lookup keys. // // Because different table sizes pull from different sub-ranges of our raw keys, // we need to compute a distinct set of random keys in the table to use for // lookups depending on the table size. And we also want to have an even // distribution of key *sizes* throughout the lookup keys, and so we can't // compute a single lookup keys array of the maximum size. Instead we need to // compute a distinct special set of lookup keys for each pair of table and // lookup size, and then shuffle that specific set into a random sequence that // is returned. This function memoizes this sequence for each pair of sizes. template auto GetShuffledLookupKeys(ssize_t table_keys_size, ssize_t lookup_keys_size) -> llvm::ArrayRef { // The raw keys aren't shuffled and round-robin through the sizes. We want to // keep the total size of lookup keys used exactly the same across runs. So // for a given size we always take the leading sequence from the raw keys for // that size, duplicating as needed to get the desired lookup sequence size, // and then shuffle the keys in that sequence to end up with a random sequence // of keys. We store each of these shuffled sequences in a map to avoid // repeatedly computing them. llvm::SmallVector& lookup_keys = (*lookup_keys_storage)[{table_keys_size, lookup_keys_size}]; if (lookup_keys.empty()) { auto raw_keys = GetRawKeys(); llvm::append_range( lookup_keys, llvm::map_range(llvm::seq(lookup_keys_size), [&](int index) { return raw_keys[index % table_keys_size]; })); absl::BitGen gen; Shuffle(lookup_keys, gen); } CARBON_CHECK(static_cast(lookup_keys.size()) == lookup_keys_size); return lookup_keys; } } // namespace template auto GetKeysAndMissKeys(ssize_t table_keys_size) -> std::pair, llvm::ArrayRef> { CARBON_CHECK(table_keys_size <= MaxNumKeys); // The raw keys aren't shuffled and round-robin through the sizes. Take the // tail of this sequence and shuffle it to form a random set of miss keys with // a consistent total size. static absl::NoDestructor> miss_keys{[] { llvm::SmallVector keys(GetRawKeys().take_back(NumOtherKeys)); CARBON_CHECK(keys.size() == NumOtherKeys); absl::BitGen gen; Shuffle(keys, gen); return keys; }()}; return {GetRawKeys().slice(0, table_keys_size), *miss_keys}; } template auto GetKeysAndMissKeys(ssize_t size) -> std::pair, llvm::ArrayRef>; template auto GetKeysAndMissKeys(ssize_t size) -> std::pair, llvm::ArrayRef>; template auto GetKeysAndMissKeys(ssize_t size) -> std::pair, llvm::ArrayRef>; template auto GetKeysAndMissKeys>(ssize_t size) -> std::pair>, llvm::ArrayRef>>; template auto GetKeysAndMissKeys>(ssize_t size) -> std::pair>, llvm::ArrayRef>>; template auto GetKeysAndMissKeys>(ssize_t size) -> std::pair>, llvm::ArrayRef>>; template auto GetKeysAndHitKeys(ssize_t table_keys_size, ssize_t lookup_keys_size) -> std::pair, llvm::ArrayRef> { CARBON_CHECK(table_keys_size <= MaxNumKeys); CARBON_CHECK(lookup_keys_size <= MaxNumKeys); return {GetRawKeys().slice(0, table_keys_size), GetShuffledLookupKeys(table_keys_size, lookup_keys_size)}; } template auto GetKeysAndHitKeys(ssize_t size, ssize_t lookup_keys_size) -> std::pair, llvm::ArrayRef>; template auto GetKeysAndHitKeys(ssize_t size, ssize_t lookup_keys_size) -> std::pair, llvm::ArrayRef>; template auto GetKeysAndHitKeys(ssize_t size, ssize_t lookup_keys_size) -> std::pair, llvm::ArrayRef>; template auto GetKeysAndHitKeys>(ssize_t size, ssize_t lookup_keys_size) -> std::pair>, llvm::ArrayRef>>; template auto GetKeysAndHitKeys>(ssize_t size, ssize_t lookup_keys_size) -> std::pair>, llvm::ArrayRef>>; template auto GetKeysAndHitKeys>(ssize_t size, ssize_t lookup_keys_size) -> std::pair>, llvm::ArrayRef>>; template auto DumpHashStatistics(llvm::ArrayRef keys) -> void { if (keys.size() < GroupSize) { return; } // The hash table load factor is 7/8ths, so we want to add 1/7th of our // current size, subtract one, and pick the next power of two to get the power // of two where 7/8ths is greater than or equal to the incoming key size. ssize_t expected_size = llvm::NextPowerOf2(keys.size() + (keys.size() / 7) - 1); constexpr int GroupShift = llvm::ConstantLog2(); size_t mask = ComputeProbeMaskFromSize(expected_size); uint64_t salt = ComputeSeed(); auto get_hash_index = [mask, salt](auto x) -> ssize_t { auto [hash_index, _] = HashValue(x, salt).template ExtractIndexAndTag<7>(); return (hash_index & mask) >> GroupShift; }; std::vector> grouped_key_indices(expected_size >> GroupShift); for (auto [i, k] : llvm::enumerate(keys)) { ssize_t hash_index = get_hash_index(k); CARBON_CHECK(hash_index < (expected_size >> GroupShift), "{0}", hash_index); grouped_key_indices[hash_index].push_back(i); } ssize_t max_group_index = llvm::max_element(grouped_key_indices, [](const auto& lhs, const auto& rhs) { return lhs.size() < rhs.size(); }) - grouped_key_indices.begin(); // If the max number of collisions on the index is less than or equal to the // group size, there shouldn't be any necessary probing (outside of deletion) // and so this isn't interesting, skip printing. if (grouped_key_indices[max_group_index].size() <= GroupSize) { return; } llvm::errs() << "keys: " << keys.size() << " groups: " << grouped_key_indices.size() << "\n" << "max group index: " << llvm::formatv("{0x8}", max_group_index) << " collisions: " << grouped_key_indices[max_group_index].size() << "\n"; for (auto i : llvm::ArrayRef(grouped_key_indices[max_group_index]) .take_front(2 * GroupSize)) { auto k = keys[i]; auto hash = static_cast(HashValue(k, salt)); llvm::errs() << " key: " << k << " salt: " << llvm::formatv("{0:x16}", salt) << " hash: " << llvm::formatv("{0:x16}", hash) << "\n"; } } template auto DumpHashStatistics(llvm::ArrayRef keys) -> void; template auto DumpHashStatistics(llvm::ArrayRef> keys) -> void; template auto DumpHashStatistics(llvm::ArrayRef> keys) -> void; template auto DumpHashStatistics(llvm::ArrayRef> keys) -> void; template auto DumpHashStatistics(llvm::ArrayRef keys) -> void; template auto DumpHashStatistics(llvm::ArrayRef keys) -> void; } // namespace Carbon::RawHashtable