před 2 roky · f59a6cdbdd
--- a/common/BUILD
+++ b/common/BUILD
@@ -2,7 +2,7 @@
 
				 # Exceptions. See /LICENSE for license information.
			
 
				 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
			
 
				 
			
 
				-load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test")
			
 
				+load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library", "cc_test")
			
 
				 
			
 
				 package(default_visibility = ["//visibility:public"])
			
 
				 
			
@@ -112,6 +112,43 @@ cc_test(
 
				     ],
			
 
				 )
			
 
				 
			
 
				+cc_library(
			
 
				+    name = "hashing",
			
 
				+    srcs = ["hashing.cpp"],
			
 
				+    hdrs = ["hashing.h"],
			
 
				+    deps = [
			
 
				+        ":check",
			
 
				+        ":ostream",
			
 
				+        "@llvm-project//llvm:Support",
			
 
				+    ],
			
 
				+)
			
 
				+
			
 
				+cc_test(
			
 
				+    name = "hashing_test",
			
 
				+    srcs = ["hashing_test.cpp"],
			
 
				+    deps = [
			
 
				+        ":hashing",
			
 
				+        "//testing/base:gtest_main",
			
 
				+        "//testing/base:test_raw_ostream",
			
 
				+        "@com_google_googletest//:gtest",
			
 
				+        "@llvm-project//llvm:Support",
			
 
				+    ],
			
 
				+)
			
 
				+
			
 
				+cc_binary(
			
 
				+    name = "hashing_benchmark",
			
 
				+    testonly = 1,
			
 
				+    srcs = ["hashing_benchmark.cpp"],
			
 
				+    deps = [
			
 
				+        ":check",
			
 
				+        ":hashing",
			
 
				+        "@com_github_google_benchmark//:benchmark_main",
			
 
				+        "@com_google_absl//absl/hash",
			
 
				+        "@com_google_absl//absl/random",
			
 
				+        "@llvm-project//llvm:Support",
			
 
				+    ],
			
 
				+)
			
 
				+
			
 
				 cc_library(
			
 
				     name = "indirect_value",
			
 
				     hdrs = ["indirect_value.h"],
			
--- a/common/hashing.cpp
+++ b/common/hashing.cpp
@@ -0,0 +1,79 @@
 
				+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
			
 
				+// Exceptions. See /LICENSE for license information.
			
 
				+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
			
 
				+
			
 
				+#include "common/hashing.h"
			
 
				+
			
 
				+namespace Carbon {
			
 
				+
			
 
				+auto Hasher::HashSizedBytesLarge(llvm::ArrayRef<std::byte> bytes) -> void {
			
 
				+  const std::byte* data_ptr = bytes.data();
			
 
				+  const ssize_t size = bytes.size();
			
 
				+  CARBON_DCHECK(size > 32);
			
 
				+
			
 
				+  // If we have 64 bytes or more, we're going to handle two 32-byte chunks at a
			
 
				+  // time using a simplified version of the main algorithm. This is based
			
 
				+  // heavily on the 64-byte and larger processing approach used by Abseil. The
			
 
				+  // goal is to mix the input data using as few multiplies (or other operations)
			
 
				+  // as we can and with as much [ILP][1] as we can. The ILP comes largely from
			
 
				+  // creating parallel structures to the operations.
			
 
				+  //
			
 
				+  // [1]: https://en.wikipedia.org/wiki/Instruction-level_parallelism
			
 
				+  auto mix32 = [](const std::byte* data_ptr, uint64_t buffer, uint64_t random0,
			
 
				+                  uint64_t random1) {
			
 
				+    uint64_t a = Read8(data_ptr);
			
 
				+    uint64_t b = Read8(data_ptr + 8);
			
 
				+    uint64_t c = Read8(data_ptr + 16);
			
 
				+    uint64_t d = Read8(data_ptr + 24);
			
 
				+    uint64_t m0 = Mix(a ^ random0, b ^ buffer);
			
 
				+    uint64_t m1 = Mix(c ^ random1, d ^ buffer);
			
 
				+    return (m0 ^ m1);
			
 
				+  };
			
 
				+
			
 
				+  // Prefetch the first bytes into cache.
			
 
				+  __builtin_prefetch(data_ptr, 0 /* read */, 0 /* discard after next use */);
			
 
				+
			
 
				+  uint64_t buffer0 = buffer ^ StaticRandomData[0];
			
 
				+  uint64_t buffer1 = buffer ^ StaticRandomData[2];
			
 
				+  const std::byte* tail_32b_ptr = data_ptr + (size - 32);
			
 
				+  const std::byte* tail_16b_ptr = data_ptr + (size - 16);
			
 
				+  const std::byte* end_ptr = data_ptr + (size - 64);
			
 
				+  while (data_ptr < end_ptr) {
			
 
				+    // Prefetch the next 64-bytes while we process the current 64-bytes.
			
 
				+    __builtin_prefetch(data_ptr + 64, 0 /* read */,
			
 
				+                       0 /* discard after next use */);
			
 
				+
			
 
				+    buffer0 =
			
 
				+        mix32(data_ptr, buffer0, StaticRandomData[4], StaticRandomData[5]);
			
 
				+    buffer1 =
			
 
				+        mix32(data_ptr + 32, buffer1, StaticRandomData[6], StaticRandomData[7]);
			
 
				+
			
 
				+    data_ptr += 64;
			
 
				+  }
			
 
				+
			
 
				+  // If we haven't reached our 32-byte tail pointer, consume another 32-bytes
			
 
				+  // directly.
			
 
				+  if (data_ptr < tail_32b_ptr) {
			
 
				+    buffer0 =
			
 
				+        mix32(data_ptr, buffer0, StaticRandomData[4], StaticRandomData[5]);
			
 
				+    data_ptr += 32;
			
 
				+  }
			
 
				+
			
 
				+  if (data_ptr < tail_16b_ptr) {
			
 
				+    // We have more than 16-bytes in the tail so use a full 32-byte mix from the
			
 
				+    // 32-byte tail pointer.
			
 
				+    buffer1 =
			
 
				+        mix32(tail_32b_ptr, buffer1, StaticRandomData[6], StaticRandomData[7]);
			
 
				+  } else {
			
 
				+    // 16-bytes or less in the tail, do something more minimal instead of a full
			
 
				+    // 32-byte mix. As this only involves a single multiply, we don't decompose
			
 
				+    // further even when the tail is (much) shorter.
			
 
				+    buffer1 = Mix(Read8(tail_16b_ptr) ^ StaticRandomData[6],
			
 
				+                  Read8(tail_16b_ptr + 8) ^ buffer1);
			
 
				+  }
			
 
				+
			
 
				+  buffer = buffer0 ^ buffer1;
			
 
				+  HashDense(size);
			
 
				+}
			
 
				+
			
 
				+}  // namespace Carbon
			
--- a/common/hashing.h
+++ b/common/hashing.h
@@ -0,0 +1,849 @@
 
				+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
			
 
				+// Exceptions. See /LICENSE for license information.
			
 
				+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
			
 
				+
			
 
				+#ifndef CARBON_COMMON_HASHING_H_
			
 
				+#define CARBON_COMMON_HASHING_H_
			
 
				+
			
 
				+#include <string>
			
 
				+#include <tuple>
			
 
				+#include <type_traits>
			
 
				+#include <utility>
			
 
				+
			
 
				+#include "common/check.h"
			
 
				+#include "common/ostream.h"
			
 
				+#include "llvm/ADT/ArrayRef.h"
			
 
				+#include "llvm/ADT/SmallVector.h"
			
 
				+#include "llvm/ADT/StringRef.h"
			
 
				+#include "llvm/Support/FormatVariadic.h"
			
 
				+#include "llvm/Support/MathExtras.h"
			
 
				+
			
 
				+#ifdef __ARM_ACLE
			
 
				+#include <arm_acle.h>
			
 
				+#endif
			
 
				+
			
 
				+namespace Carbon {
			
 
				+
			
 
				+// A 64-bit hash code produced by `Carbon::HashValue`.
			
 
				+//
			
 
				+// This provides methods for extracting high-quality bits from the hash code
			
 
				+// quickly.
			
 
				+//
			
 
				+// This class can also be a hashing input when recursively hashing more complex
			
 
				+// data structures.
			
 
				+class HashCode : public Printable<HashCode> {
			
 
				+ public:
			
 
				+  HashCode() = default;
			
 
				+
			
 
				+  constexpr explicit HashCode(uint64_t value) : value_(value) {}
			
 
				+
			
 
				+  friend constexpr auto operator==(HashCode lhs, HashCode rhs) -> bool {
			
 
				+    return lhs.value_ == rhs.value_;
			
 
				+  }
			
 
				+  friend constexpr auto operator!=(HashCode lhs, HashCode rhs) -> bool {
			
 
				+    return lhs.value_ != rhs.value_;
			
 
				+  }
			
 
				+
			
 
				+  // Extracts an index from the hash code that is in the range [0, size). The
			
 
				+  // size and returned index are `ssize_t` for performance reasons. This is
			
 
				+  // useful when using the hash code to index a hash table. It prioritizes
			
 
				+  // computing the index from the bits in the hash code with the highest
			
 
				+  // entropy.
			
 
				+  constexpr auto ExtractIndex(ssize_t size) -> ssize_t;
			
 
				+
			
 
				+  // Extracts an index and a fixed `N`-bit tag from the hash code.
			
 
				+  //
			
 
				+  // This will both minimize overlap between the tag and the index as well as
			
 
				+  // maximizing the entropy of the bits that contribute to each.
			
 
				+  //
			
 
				+  // The index will be in the range [0, `size`). The `size` must be a power of
			
 
				+  // two, and `N` must be in the range [1, 32].
			
 
				+  template <int N>
			
 
				+  constexpr auto ExtractIndexAndTag(ssize_t size)
			
 
				+      -> std::pair<ssize_t, uint32_t>;
			
 
				+
			
 
				+  // Extract the full 64-bit hash code as an integer.
			
 
				+  //
			
 
				+  // The methods above should be preferred rather than directly manipulating
			
 
				+  // this integer. This is provided primarily to enable Merkle-tree hashing or
			
 
				+  // other recursive hashing where that is needed or more efficient.
			
 
				+  explicit operator uint64_t() const { return value_; }
			
 
				+
			
 
				+  auto Print(llvm::raw_ostream& out) const -> void {
			
 
				+    out << llvm::formatv("{0:x16}", value_);
			
 
				+  }
			
 
				+
			
 
				+ private:
			
 
				+  uint64_t value_ = 0;
			
 
				+};
			
 
				+
			
 
				+// Computes a hash code for the provided value, incorporating the provided seed.
			
 
				+//
			
 
				+// The seed doesn't need to be of any particular high quality, but a zero seed
			
 
				+// has bad effects in several places. Prefer the unseeded routine rather than
			
 
				+// providing a zero here.
			
 
				+//
			
 
				+// This **not** a cryptographically secure or stable hash -- it is only designed
			
 
				+// for use with in-memory hash table style data structures. Being fast and
			
 
				+// effective for that use case is the guiding principle of its design.
			
 
				+//
			
 
				+// There is no guarantee that the values produced are stable from execution to
			
 
				+// execution. For speed and quality reasons, the implementation does not
			
 
				+// introduce any variance to defend against accidental dependencies. As a
			
 
				+// consequence, it is strongly encouraged to use a seed that varies from
			
 
				+// execution to execution to avoid depending on specific values produced.
			
 
				+//
			
 
				+// The algorithm used is most heavily based on [Abseil's hashing algorithm][1],
			
 
				+// with some additional ideas and inspiration from the fallback hashing
			
 
				+// algorithm in [Rust's AHash][2] and the [FxHash][3] function. However, there
			
 
				+// are also *significant* changes introduced here.
			
 
				+//
			
 
				+// [1]: https://github.com/abseil/abseil-cpp/tree/master/absl/hash/internal
			
 
				+// [2]: https://github.com/tkaitchuck/aHash/wiki/AHash-fallback-algorithm
			
 
				+// [3]: https://docs.rs/fxhash/latest/fxhash/
			
 
				+//
			
 
				+// This hash algorithm does *not* defend against hash flooding. While it can be
			
 
				+// viewed as "keyed" on the seed, it is expected to be possible to craft inputs
			
 
				+// for some data types that cancel out the seed used and manufacture endlessly
			
 
				+// colliding sets of keys. In general, this function works to be *fast* for hash
			
 
				+// tables. If you need to defend against hash flooding, either directly use a
			
 
				+// data structure with strong worst-case guarantees, or a hash table which
			
 
				+// detects catastrophic collisions and falls back to such a data structure.
			
 
				+//
			
 
				+// This hash function is heavily optimized for *latency* over *quality*. Modern
			
 
				+// hash tables designs can efficiently handle reasonable collision rates,
			
 
				+// including using extra bits from the hash to avoid all efficiency coming from
			
 
				+// the same low bits. Because of this, low-latency is significantly more
			
 
				+// important for performance than high-quality, and this is heavily leveraged.
			
 
				+// The result is that the hash codes produced *do* have significant avalanche
			
 
				+// problems for small keys. The upside is that the latency for hashing integers,
			
 
				+// pointers, and small byte strings (up to 32-bytes) is exceptionally low, and
			
 
				+// essentially a small constant time instruction sequence.
			
 
				+//
			
 
				+// No exotic instruction set extensions are required, and the state used is
			
 
				+// small. It does rely on being able to get the low- and high-64-bit results of
			
 
				+// a 64-bit multiply efficiently.
			
 
				+//
			
 
				+// The function supports many typical data types such as primitives, string-ish
			
 
				+// views, and types composing primitives transparently like pairs, tuples, and
			
 
				+// array-ish views. It is also extensible to support user-defined types.
			
 
				+//
			
 
				+// The builtin support for string-like types include:
			
 
				+// - `std::string_view`
			
 
				+// - `std::string`
			
 
				+// - `llvm::StringRef`
			
 
				+// - `llvm::SmallString`
			
 
				+//
			
 
				+// This function supports heterogeneous lookup between all of the string-like
			
 
				+// types. It also supports heterogeneous lookup between pointer types regardless
			
 
				+// of pointee type and `nullptr`.
			
 
				+//
			
 
				+// However, these are the only heterogeneous lookup support including for the
			
 
				+// builtin in, standard, and LLVM types. Notably, each different size and
			
 
				+// signedness integer type may hash differently for efficiency reasons. Hash
			
 
				+// tables should pick a single integer type in which to manage keys and do
			
 
				+// lookups.
			
 
				+//
			
 
				+// To add support for your type, you need to implement a customization point --
			
 
				+// a free function that can be found by ADL for your type -- called
			
 
				+// `CarbonHashValue` with the following signature:
			
 
				+//
			
 
				+// ```cpp
			
 
				+// auto CarbonHashValue(const YourType& value, uint64_t seed) -> HashCode;
			
 
				+// ```
			
 
				+//
			
 
				+// The extension point needs to ensure that values that compare equal (including
			
 
				+// any comparisons with different types that might be used with a hash table of
			
 
				+// `YourType` keys) produce the same `HashCode` values.
			
 
				+//
			
 
				+// `HashCode` values should typically be produced using the `Hasher` helper type
			
 
				+// below. See its documentation for more details about implementing these
			
 
				+// customization points and how best to incorporate the value's state into a
			
 
				+// `HashCode`.
			
 
				+//
			
 
				+// For two input values that are almost but not quite equal, the extension
			
 
				+// point should maximize the probability of each bit of their resulting
			
 
				+// `HashCode`s differing. More formally, `HashCode`s should exhibit an
			
 
				+// [avalanche effect][4]. However, while this is desirable, it should be
			
 
				+// **secondary** to low latency. The intended use case of these functions is not
			
 
				+// cryptography but in-memory hashtables where the latency and overhead of
			
 
				+// computing the `HashCode` is *significantly* more important than achieving a
			
 
				+// particularly high quality. The goal is to have "just enough" avalanche
			
 
				+// effect, but there is not a fixed criteria for how much is enough. That should
			
 
				+// be determined through practical experimentation with a hashtable and
			
 
				+// distribution of keys.
			
 
				+//
			
 
				+// [4]: https://en.wikipedia.org/wiki/Avalanche_effect
			
 
				+template <typename T>
			
 
				+inline auto HashValue(const T& value, uint64_t seed) -> HashCode;
			
 
				+
			
 
				+// The same as the seeded version of `HashValue` but without callers needing to
			
 
				+// provide a seed.
			
 
				+//
			
 
				+// Generally prefer the seeded version, but this is available if there is no
			
 
				+// reasonable seed. In particular, this will behave better than using a seed of
			
 
				+// `0`. One important use case is for recursive hashing of sub-objects where
			
 
				+// appropriate or needed.
			
 
				+template <typename T>
			
 
				+inline auto HashValue(const T& value) -> HashCode;
			
 
				+
			
 
				+// Object and APIs that eventually produce a hash code.
			
 
				+//
			
 
				+// This type is primarily used by types to implement a customization point
			
 
				+// `CarbonHashValue` that will in turn be used by the `HashValue` function. See
			
 
				+// the `HashValue` function for details of that extension point.
			
 
				+//
			
 
				+// The methods on this type can be used to incorporate data from your
			
 
				+// user-defined type into its internal state which can be converted to a
			
 
				+// `HashCode` at any time. These methods will only produce the same `HashCode`
			
 
				+// if they are called in the exact same order with the same arguments -- there
			
 
				+// are no guaranteed equivalences between calling different methods.
			
 
				+//
			
 
				+// Example usage:
			
 
				+// ```cpp
			
 
				+// auto CarbonHashValue(const MyType& value, uint64_t seed) -> HashCode {
			
 
				+//   Hasher hasher(seed);
			
 
				+//   hasher.HashTwo(value.x, value.y);
			
 
				+//   return static_cast<HashCode>(hasher);
			
 
				+// }
			
 
				+// ```
			
 
				+//
			
 
				+// This type's API also reflects the reality that high-performance hash tables
			
 
				+// are used with keys that are generally small and cheap to hash.
			
 
				+//
			
 
				+// To ensure this type's code is optimized effectively, it should typically be
			
 
				+// used as a local variable and not passed across function boundaries
			
 
				+// unnecessarily.
			
 
				+//
			
 
				+// The type also provides a number of static helper functions and static data
			
 
				+// members that may be used by authors of `CarbonHashValue` implementations to
			
 
				+// efficiently compute the inputs to the core `Hasher` methods, or even to
			
 
				+// manually do some amounts of hashing in performance-tuned ways outside of the
			
 
				+// methods provided.
			
 
				+class Hasher {
			
 
				+ public:
			
 
				+  Hasher() = default;
			
 
				+  explicit Hasher(uint64_t seed) : buffer(seed) {}
			
 
				+
			
 
				+  Hasher(Hasher&& arg) = default;
			
 
				+  Hasher(const Hasher& arg) = delete;
			
 
				+  auto operator=(Hasher&& rhs) -> Hasher& = default;
			
 
				+
			
 
				+  // Extracts the current state as a `HashCode` for use.
			
 
				+  explicit operator HashCode() const { return HashCode(buffer); }
			
 
				+
			
 
				+  // Incorporates an object into the hasher's state by hashing its object
			
 
				+  // representation. Requires `value`'s type to have a unique object
			
 
				+  // representation. This is primarily useful for builtin and primitive types.
			
 
				+  //
			
 
				+  // This can be directly used for simple users combining some aggregation of
			
 
				+  // objects. However, when possible, prefer the variadic version below for
			
 
				+  // aggregating several primitive types into a hash.
			
 
				+  template <typename T, typename = std::enable_if_t<
			
 
				+                            std::has_unique_object_representations_v<T>>>
			
 
				+  auto Hash(const T& value) -> void;
			
 
				+
			
 
				+  // Incorporates a variable number of objects into the `hasher`s state in a
			
 
				+  // similar manner to applying the above function to each one in series. It has
			
 
				+  // the same requirements as the above function for each `value`. And it
			
 
				+  // returns the updated `hasher`.
			
 
				+  //
			
 
				+  // There is no guaranteed correspondence between the behavior of a single call
			
 
				+  // with multiple parameters and multiple calls. This routine is also optimized
			
 
				+  // for handling relatively small numbers of objects. For hashing large
			
 
				+  // aggregations, consider some Merkle-tree decomposition or arranging for a
			
 
				+  // byte buffer that can be hashed as a single buffer. However, hashing large
			
 
				+  // aggregations of data in this way is rarely results in effectively
			
 
				+  // high-performance hash table data structures and so should generally be
			
 
				+  // avoided.
			
 
				+  template <typename... Ts,
			
 
				+            typename = std::enable_if_t<
			
 
				+                (... && std::has_unique_object_representations_v<Ts>)>>
			
 
				+  auto Hash(const Ts&... value) -> void;
			
 
				+
			
 
				+  // Simpler and more primitive functions to incorporate state represented in
			
 
				+  // `uint64_t` values into the hasher's state.
			
 
				+  //
			
 
				+  // These may be slightly less efficient than the `Hash` method above for a
			
 
				+  // typical application code `uint64_t`, but are designed to work well even
			
 
				+  // when relevant data has been packed into the `uint64_t` parameters densely.
			
 
				+  auto HashDense(uint64_t data) -> void;
			
 
				+  auto HashDense(uint64_t data0, uint64_t data1) -> void;
			
 
				+
			
 
				+  // A heavily optimized routine for incorporating a dynamically sized sequence
			
 
				+  // of bytes into the hasher's state.
			
 
				+  //
			
 
				+  // This routine has carefully structured inline code paths for short byte
			
 
				+  // sequences and a reasonably high bandwidth code path for longer sequences.
			
 
				+  // The size of the byte sequence is always incorporated into the hasher's
			
 
				+  // state along with the contents.
			
 
				+  auto HashSizedBytes(llvm::ArrayRef<std::byte> bytes) -> void;
			
 
				+
			
 
				+  // An out-of-line, throughput-optimized routine for incorporating a
			
 
				+  // dynamically sized sequence when the sequence size is guaranteed to be >32.
			
 
				+  // The size is always incorporated into the state.
			
 
				+  auto HashSizedBytesLarge(llvm::ArrayRef<std::byte> bytes) -> void;
			
 
				+
			
 
				+  // Utility functions to read data of various sizes efficiently into a
			
 
				+  // 64-bit value. These pointers need-not be aligned, and can alias other
			
 
				+  // objects. The representation of the read data in the `uint64_t` returned is
			
 
				+  // not stable or guaranteed.
			
 
				+  static auto Read1(const std::byte* data) -> uint64_t;
			
 
				+  static auto Read2(const std::byte* data) -> uint64_t;
			
 
				+  static auto Read4(const std::byte* data) -> uint64_t;
			
 
				+  static auto Read8(const std::byte* data) -> uint64_t;
			
 
				+
			
 
				+  // Similar to the `ReadN` functions, but supports reading a range of different
			
 
				+  // bytes provided by the size *without branching on the size*. The lack of
			
 
				+  // branches is often key, and the code in these routines works to be efficient
			
 
				+  // in extracting a *dynamic* size of bytes into the returned `uint64_t`. There
			
 
				+  // may be overlap between different routines, because these routines are based
			
 
				+  // on different implementation techniques that do have some overlap in the
			
 
				+  // range of sizes they can support. Which routine is the most efficient for a
			
 
				+  // size in the overlap isn't trivial, and so these primitives are provided
			
 
				+  // as-is and should be selected based on the localized generated code and
			
 
				+  // benchmarked performance.
			
 
				+  static auto Read1To3(const std::byte* data, ssize_t size) -> uint64_t;
			
 
				+  static auto Read4To8(const std::byte* data, ssize_t size) -> uint64_t;
			
 
				+  static auto Read8To16(const std::byte* data, ssize_t size)
			
 
				+      -> std::pair<uint64_t, uint64_t>;
			
 
				+
			
 
				+  // Reads the underlying object representation of a type into a 64-bit integer
			
 
				+  // efficiently. Only supports types with unique object representation and at
			
 
				+  // most 8-bytes large. This is typically used to read primitive types.
			
 
				+  template <typename T,
			
 
				+            typename = std::enable_if_t<
			
 
				+                std::has_unique_object_representations_v<T> && sizeof(T) <= 8>>
			
 
				+  static auto ReadSmall(const T& value) -> uint64_t;
			
 
				+
			
 
				+  // The core of the hash algorithm is this mix function. The specific
			
 
				+  // operations are not guaranteed to be stable but are described here for
			
 
				+  // hashing authors to understand what to expect.
			
 
				+  //
			
 
				+  // Currently, this uses the same "mix" operation as in Abseil, AHash, and
			
 
				+  // several other hashing algorithms. It takes two 64-bit integers, and
			
 
				+  // multiplies them, capturing both the high 64-bit result and the low 64-bit
			
 
				+  // result, and then XOR-ing those two halves together.
			
 
				+  //
			
 
				+  // A consequence of this operation is that a zero on either side will fail to
			
 
				+  // incorporate any bits from the other side. Often, this is an acceptable rate
			
 
				+  // of collision in practice. But it is worth being aware of and working to
			
 
				+  // avoid common paths encountering this. For example, naively used this might
			
 
				+  // cause different length all-zero byte strings to hash the same, essentially
			
 
				+  // losing the length in the composition of the hash for a likely important
			
 
				+  // case of byte sequence.
			
 
				+  //
			
 
				+  // Another consequence of the particular implementation is that it is useful
			
 
				+  // to have a reasonable distribution of bits throughout both sides of the
			
 
				+  // multiplication. However, it is not *necessary* as we do capture the
			
 
				+  // complete 128-bit result. Where reasonable, the caller should XOR random
			
 
				+  // data into operands before calling `Mix` to try and increase the
			
 
				+  // distribution of bits feeding the multiply.
			
 
				+  static auto Mix(uint64_t lhs, uint64_t rhs) -> uint64_t;
			
 
				+
			
 
				+  // An alternative to `Mix` that is significantly weaker but also lower
			
 
				+  // latency. It should not be used when the input `uint64_t` is densely packed
			
 
				+  // with data, but is a good option for hashing a single integer or pointer
			
 
				+  // where the full 64-bits are sparsely populated and especially the high bits
			
 
				+  // are often invariant between interestingly different values.
			
 
				+  //
			
 
				+  // This uses just the low 64-bit result of a multiply. It ensures the operand
			
 
				+  // is good at diffusing bits, but inherently the high bits of the input will
			
 
				+  // be (significantly) less often represented in the output. It also does some
			
 
				+  // reversal to ensure the *low* bits of the result are the most useful ones.
			
 
				+  static auto WeakMix(uint64_t value) -> uint64_t;
			
 
				+
			
 
				+  // We have a 64-byte random data pool designed to fit on a single cache line.
			
 
				+  // This routine allows sampling it at byte indices, which allows getting 64 -
			
 
				+  // 8 different random 64-bit results. The offset must be in the range [0, 56).
			
 
				+  static auto SampleRandomData(ssize_t offset) -> uint64_t {
			
 
				+    CARBON_DCHECK(offset + sizeof(uint64_t) < sizeof(StaticRandomData));
			
 
				+    uint64_t data;
			
 
				+    memcpy(&data,
			
 
				+           reinterpret_cast<const unsigned char*>(&StaticRandomData) + offset,
			
 
				+           sizeof(data));
			
 
				+    return data;
			
 
				+  }
			
 
				+
			
 
				+  // Random data taken from the hexadecimal digits of Pi's fractional component,
			
 
				+  // written in lexical order for convenience of reading. The resulting
			
 
				+  // byte-stream will be different due to little-endian integers. These can be
			
 
				+  // used directly for convenience rather than calling `SampleRandomData`, but
			
 
				+  // be aware that this is the underlying pool. The goal is to reuse the same
			
 
				+  // single cache-line of constant data.
			
 
				+  //
			
 
				+  // The initializers here can be generated with the following shell script,
			
 
				+  // which will generate 8 64-bit values and one more digit. The `bc` command's
			
 
				+  // decimal based scaling means that without getting at least some extra hex
			
 
				+  // digits rendered there will be rounding that we don't want so the script
			
 
				+  // below goes on to produce one more hex digit ensuring the the 8 initializers
			
 
				+  // aren't rounded in any way. Using a higher scale won't cause the 8
			
 
				+  // initializers here to change further.
			
 
				+  //
			
 
				+  // ```sh
			
 
				+  // echo 'obase=16; scale=155; 4*a(1)' | env BC_LINE_LENGTH=500 bc -l \
			
 
				+  //  | cut -c 3- | tr '[:upper:]' '[:lower:]' \
			
 
				+  //  | sed -e "s/.\{4\}/&'/g" \
			
 
				+  //  | sed -e "s/\(.\{4\}'.\{4\}'.\{4\}'.\{4\}\)'/0x\1,\n/g"
			
 
				+  // ```
			
 
				+  static inline constexpr std::array<uint64_t, 8> StaticRandomData = {
			
 
				+      0x243f'6a88'85a3'08d3, 0x1319'8a2e'0370'7344, 0xa409'3822'299f'31d0,
			
 
				+      0x082e'fa98'ec4e'6c89, 0x4528'21e6'38d0'1377, 0xbe54'66cf'34e9'0c6c,
			
 
				+      0xc0ac'29b7'c97c'50dd, 0x3f84'd5b5'b547'0917,
			
 
				+  };
			
 
				+
			
 
				+  // The multiplicative hash constant from Knuth, derived from 2^64 / Phi. For
			
 
				+  // details on its selection, see:
			
 
				+  // https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
			
 
				+  // https://book.huihoo.com/data-structures-and-algorithms-with-object-oriented-design-patterns-in-c++/html/page214.html
			
 
				+  static constexpr uint64_t MulConstant = 0x9e37'79b9'7f4a'7c15U;
			
 
				+
			
 
				+ private:
			
 
				+  uint64_t buffer;
			
 
				+};
			
 
				+
			
 
				+// A dedicated namespace for `CarbonHashValue` overloads that are not found by
			
 
				+// ADL with their associated types. For example, primitive type overloads or
			
 
				+// overloads for types in LLVM's libraries.
			
 
				+namespace HashDispatch {
			
 
				+
			
 
				+inline auto CarbonHashValue(llvm::ArrayRef<std::byte> bytes, uint64_t seed)
			
 
				+    -> HashCode {
			
 
				+  Hasher hasher(seed);
			
 
				+  hasher.HashSizedBytes(bytes);
			
 
				+  return static_cast<HashCode>(hasher);
			
 
				+}
			
 
				+
			
 
				+// Hashing implementation for `llvm::StringRef`. We forward all the other
			
 
				+// string-like types that support heterogeneous lookup to this one.
			
 
				+inline auto CarbonHashValue(llvm::StringRef value, uint64_t seed) -> HashCode {
			
 
				+  return CarbonHashValue(
			
 
				+      llvm::ArrayRef(reinterpret_cast<const std::byte*>(value.data()),
			
 
				+                     value.size()),
			
 
				+      seed);
			
 
				+}
			
 
				+
			
 
				+inline auto CarbonHashValue(std::string_view value, uint64_t seed) -> HashCode {
			
 
				+  return CarbonHashValue(llvm::StringRef(value.data(), value.size()), seed);
			
 
				+}
			
 
				+
			
 
				+inline auto CarbonHashValue(const std::string& value, uint64_t seed)
			
 
				+    -> HashCode {
			
 
				+  return CarbonHashValue(llvm::StringRef(value.data(), value.size()), seed);
			
 
				+}
			
 
				+
			
 
				+template <unsigned Length>
			
 
				+inline auto CarbonHashValue(const llvm::SmallString<Length>& value,
			
 
				+                            uint64_t seed) -> HashCode {
			
 
				+  return CarbonHashValue(llvm::StringRef(value.data(), value.size()), seed);
			
 
				+}
			
 
				+
			
 
				+// C++ guarantees this is true for the unsigned variants, but we require it for
			
 
				+// signed variants and pointers.
			
 
				+static_assert(std::has_unique_object_representations_v<int8_t>);
			
 
				+static_assert(std::has_unique_object_representations_v<int16_t>);
			
 
				+static_assert(std::has_unique_object_representations_v<int32_t>);
			
 
				+static_assert(std::has_unique_object_representations_v<int64_t>);
			
 
				+static_assert(std::has_unique_object_representations_v<void*>);
			
 
				+
			
 
				+// C++ uses `std::nullptr_t` but unfortunately doesn't make it have a unique
			
 
				+// object representation. To address that, we need a function that converts
			
 
				+// `nullptr` back into a `void*` that will have a unique object representation.
			
 
				+// And this needs to be done by-value as we need to build a temporary object to
			
 
				+// return, which requires a separate overload rather than just using a type
			
 
				+// function that could be used in parallel in the predicate below. Instead, we
			
 
				+// build the predicate independently of the mapping overload, but together they
			
 
				+// should produce the correct result.
			
 
				+template <typename T>
			
 
				+inline auto MapNullPtrToVoidPtr(const T& value) -> const T& {
			
 
				+  // This overload should never be selected for `std::nullptr_t`, so
			
 
				+  // static_assert to get some better compiler error messages.
			
 
				+  static_assert(!std::is_same_v<T, std::nullptr_t>);
			
 
				+  return value;
			
 
				+}
			
 
				+inline auto MapNullPtrToVoidPtr(std::nullptr_t /*value*/) -> const void* {
			
 
				+  return nullptr;
			
 
				+}
			
 
				+
			
 
				+// Predicate to be used in conjunction with a `nullptr` mapping routine like the
			
 
				+// above.
			
 
				+template <typename T>
			
 
				+constexpr bool NullPtrOrHasUniqueObjectRepresentations =
			
 
				+    std::is_same_v<T, std::nullptr_t> ||
			
 
				+    std::has_unique_object_representations_v<T>;
			
 
				+
			
 
				+template <typename T, typename = std::enable_if_t<
			
 
				+                          NullPtrOrHasUniqueObjectRepresentations<T>>>
			
 
				+inline auto CarbonHashValue(const T& value, uint64_t seed) -> HashCode {
			
 
				+  Hasher hasher(seed);
			
 
				+  hasher.Hash(MapNullPtrToVoidPtr(value));
			
 
				+  return static_cast<HashCode>(hasher);
			
 
				+}
			
 
				+
			
 
				+template <typename... Ts,
			
 
				+          typename = std::enable_if_t<
			
 
				+              (... && NullPtrOrHasUniqueObjectRepresentations<Ts>)>>
			
 
				+inline auto CarbonHashValue(const std::tuple<Ts...>& value, uint64_t seed)
			
 
				+    -> HashCode {
			
 
				+  Hasher hasher(seed);
			
 
				+  std::apply(
			
 
				+      [&](const auto&... args) { hasher.Hash(MapNullPtrToVoidPtr(args)...); },
			
 
				+      value);
			
 
				+  return static_cast<HashCode>(hasher);
			
 
				+}
			
 
				+
			
 
				+template <typename T, typename U,
			
 
				+          typename = std::enable_if_t<
			
 
				+              NullPtrOrHasUniqueObjectRepresentations<T> &&
			
 
				+              NullPtrOrHasUniqueObjectRepresentations<U> &&
			
 
				+              sizeof(T) <= sizeof(uint64_t) && sizeof(U) <= sizeof(uint64_t)>>
			
 
				+inline auto CarbonHashValue(const std::pair<T, U>& value, uint64_t seed)
			
 
				+    -> HashCode {
			
 
				+  return CarbonHashValue(std::tuple(value.first, value.second), seed);
			
 
				+}
			
 
				+
			
 
				+template <typename T, typename = std::enable_if_t<
			
 
				+                          std::has_unique_object_representations_v<T>>>
			
 
				+inline auto CarbonHashValue(llvm::ArrayRef<T> objs, uint64_t seed) -> HashCode {
			
 
				+  return CarbonHashValue(
			
 
				+      llvm::ArrayRef(reinterpret_cast<const std::byte*>(objs.data()),
			
 
				+                     objs.size() * sizeof(T)),
			
 
				+      seed);
			
 
				+}
			
 
				+
			
 
				+template <typename T>
			
 
				+inline auto DispatchImpl(const T& value, uint64_t seed) -> HashCode {
			
 
				+  // This unqualified call will find both the overloads in this namespace and
			
 
				+  // ADL-found functions in an associated namespace of `T`.
			
 
				+  return CarbonHashValue(value, seed);
			
 
				+}
			
 
				+
			
 
				+}  // namespace HashDispatch
			
 
				+
			
 
				+template <typename T>
			
 
				+inline auto HashValue(const T& value, uint64_t seed) -> HashCode {
			
 
				+  return HashDispatch::DispatchImpl(value, seed);
			
 
				+}
			
 
				+
			
 
				+template <typename T>
			
 
				+inline auto HashValue(const T& value) -> HashCode {
			
 
				+  // When a seed isn't provided, use the last 64-bit chunk of random data. Other
			
 
				+  // chunks (especially the first) are more often XOR-ed with the seed and risk
			
 
				+  // cancelling each other out and feeding a zero to a `Mix` call in a way that
			
 
				+  // sharply increasing collisions.
			
 
				+  return HashValue(value, Hasher::StaticRandomData[7]);
			
 
				+}
			
 
				+
			
 
				+inline constexpr auto HashCode::ExtractIndex(ssize_t size) -> ssize_t {
			
 
				+  CARBON_DCHECK(llvm::isPowerOf2_64(size));
			
 
				+  return value_ & (size - 1);
			
 
				+}
			
 
				+
			
 
				+template <int N>
			
 
				+inline constexpr auto HashCode::ExtractIndexAndTag(ssize_t size)
			
 
				+    -> std::pair<ssize_t, uint32_t> {
			
 
				+  static_assert(N >= 1);
			
 
				+  static_assert(N <= 32);
			
 
				+  CARBON_DCHECK(llvm::isPowerOf2_64(size));
			
 
				+  CARBON_DCHECK(1LL << (64 - N) >= size) << "Not enough bits for size and tag!";
			
 
				+  return {static_cast<ssize_t>((value_ >> N) & (size - 1)),
			
 
				+          static_cast<uint32_t>(value_ & ((1U << (N + 1)) - 1))};
			
 
				+}
			
 
				+
			
 
				+// Building with `-DCARBON_MCA_MARKERS` will enable `llvm-mca` annotations in
			
 
				+// the source code. These can interfere with optimization, but allows analyzing
			
 
				+// the generated `.s` file with the `llvm-mca` tool. Documentation for these
			
 
				+// markers is here:
			
 
				+// https://llvm.org/docs/CommandGuide/llvm-mca.html#using-markers-to-analyze-specific-code-blocks
			
 
				+#if CARBON_MCA_MARKERS
			
 
				+#define CARBON_MCA_BEGIN(NAME) \
			
 
				+  __asm volatile("# LLVM-MCA-BEGIN " NAME "" ::: "memory");
			
 
				+#define CARBON_MCA_END(NAME) \
			
 
				+  __asm volatile("# LLVM-MCA-END " NAME "" ::: "memory");
			
 
				+#else
			
 
				+#define CARBON_MCA_BEGIN(NAME)
			
 
				+#define CARBON_MCA_END(NAME)
			
 
				+#endif
			
 
				+
			
 
				+inline auto Hasher::Read1(const std::byte* data) -> uint64_t {
			
 
				+  uint8_t result;
			
 
				+  std::memcpy(&result, data, sizeof(result));
			
 
				+  return result;
			
 
				+}
			
 
				+
			
 
				+inline auto Hasher::Read2(const std::byte* data) -> uint64_t {
			
 
				+  uint16_t result;
			
 
				+  std::memcpy(&result, data, sizeof(result));
			
 
				+  return result;
			
 
				+}
			
 
				+
			
 
				+inline auto Hasher::Read4(const std::byte* data) -> uint64_t {
			
 
				+  uint32_t result;
			
 
				+  std::memcpy(&result, data, sizeof(result));
			
 
				+  return result;
			
 
				+}
			
 
				+
			
 
				+inline auto Hasher::Read8(const std::byte* data) -> uint64_t {
			
 
				+  uint64_t result;
			
 
				+  std::memcpy(&result, data, sizeof(result));
			
 
				+  return result;
			
 
				+}
			
 
				+
			
 
				+inline auto Hasher::Read1To3(const std::byte* data, ssize_t size) -> uint64_t {
			
 
				+  // Use carefully crafted indexing to avoid branches on the exact size while
			
 
				+  // reading.
			
 
				+  uint64_t byte0 = static_cast<uint8_t>(data[0]);
			
 
				+  uint64_t byte1 = static_cast<uint8_t>(data[size - 1]);
			
 
				+  uint64_t byte2 = static_cast<uint8_t>(data[size >> 1]);
			
 
				+  return byte0 | (byte1 << 16) | (byte2 << 8);
			
 
				+}
			
 
				+
			
 
				+inline auto Hasher::Read4To8(const std::byte* data, ssize_t size) -> uint64_t {
			
 
				+  uint32_t low;
			
 
				+  std::memcpy(&low, data, sizeof(low));
			
 
				+  uint32_t high;
			
 
				+  std::memcpy(&high, data + size - sizeof(high), sizeof(high));
			
 
				+  return low | (static_cast<uint64_t>(high) << 32);
			
 
				+}
			
 
				+
			
 
				+inline auto Hasher::Read8To16(const std::byte* data, ssize_t size)
			
 
				+    -> std::pair<uint64_t, uint64_t> {
			
 
				+  uint64_t low;
			
 
				+  std::memcpy(&low, data, sizeof(low));
			
 
				+  uint64_t high;
			
 
				+  std::memcpy(&high, data + size - sizeof(high), sizeof(high));
			
 
				+  return {low, high};
			
 
				+}
			
 
				+
			
 
				+inline auto Hasher::Mix(uint64_t lhs, uint64_t rhs) -> uint64_t {
			
 
				+  // Use the C23 extended integer support that Clang provides as a general
			
 
				+  // language extension.
			
 
				+  using U128 = unsigned _BitInt(128);
			
 
				+  U128 result = static_cast<U128>(lhs) * static_cast<U128>(rhs);
			
 
				+  return static_cast<uint64_t>(result) ^ static_cast<uint64_t>(result >> 64);
			
 
				+}
			
 
				+
			
 
				+inline auto Hasher::WeakMix(uint64_t value) -> uint64_t {
			
 
				+  value *= MulConstant;
			
 
				+#ifdef __ARM_ACLE
			
 
				+  // Arm has a fast bit-reversal that gives us the optimal distribution.
			
 
				+  value = __rbitll(value);
			
 
				+#else
			
 
				+  // Otherwise, assume an optimized BSWAP such as x86's. That's close enough.
			
 
				+  value = __builtin_bswap64(value);
			
 
				+#endif
			
 
				+  return value;
			
 
				+}
			
 
				+
			
 
				+inline auto Hasher::HashDense(uint64_t data) -> void {
			
 
				+  // When hashing exactly one 64-bit entity use the Phi-derived constant as this
			
 
				+  // is just multiplicative hashing. The initial buffer is mixed on input to
			
 
				+  // pipeline with materializing the constant.
			
 
				+  buffer = Mix(data ^ buffer, MulConstant);
			
 
				+}
			
 
				+
			
 
				+inline auto Hasher::HashDense(uint64_t data0, uint64_t data1) -> void {
			
 
				+  // When hashing two chunks of data at the same time, we XOR it with random
			
 
				+  // data to avoid common inputs from having especially bad multiplicative
			
 
				+  // effects. We also XOR in the starting buffer as seed or to chain. Note that
			
 
				+  // we don't use *consecutive* random data 64-bit values to avoid a common
			
 
				+  // compiler "optimization" of loading both 64-bit chunks into a 128-bit vector
			
 
				+  // and doing the XOR in the vector unit. The latency of extracting the data
			
 
				+  // afterward eclipses any benefit. Callers will routinely have two consecutive
			
 
				+  // data values here, but using non-consecutive keys avoids any vectorization
			
 
				+  // being tempting.
			
 
				+  //
			
 
				+  // XOR-ing both the incoming state and a random word over the second data is
			
 
				+  // done to pipeline with materializing the constants and is observed to have
			
 
				+  // better performance than XOR-ing after the mix.
			
 
				+  //
			
 
				+  // This roughly matches the mix pattern used in the larger mixing routines
			
 
				+  // from Abseil, which is a more minimal form than used in other algorithms
			
 
				+  // such as AHash and seems adequate for latency-optimized use cases.
			
 
				+  buffer =
			
 
				+      Mix(data0 ^ StaticRandomData[1], data1 ^ StaticRandomData[3] ^ buffer);
			
 
				+}
			
 
				+
			
 
				+template <typename T, typename /*enable_if*/>
			
 
				+inline auto Hasher::ReadSmall(const T& value) -> uint64_t {
			
 
				+  const auto* storage = reinterpret_cast<const std::byte*>(&value);
			
 
				+  if constexpr (sizeof(T) == 1) {
			
 
				+    return Read1(storage);
			
 
				+  } else if constexpr (sizeof(T) == 2) {
			
 
				+    return Read2(storage);
			
 
				+  } else if constexpr (sizeof(T) == 3) {
			
 
				+    return Read2(storage) | (Read1(&storage[2]) << 16);
			
 
				+  } else if constexpr (sizeof(T) == 4) {
			
 
				+    return Read4(storage);
			
 
				+  } else if constexpr (sizeof(T) == 5) {
			
 
				+    return Read4(storage) | (Read1(&storage[4]) << 32);
			
 
				+  } else if constexpr (sizeof(T) == 6 || sizeof(T) == 7) {
			
 
				+    // Use overlapping 4-byte reads for 6 and 7 bytes.
			
 
				+    return Read4(storage) | (Read4(&storage[sizeof(T) - 4]) << 32);
			
 
				+  } else if constexpr (sizeof(T) == 8) {
			
 
				+    return Read8(storage);
			
 
				+  } else {
			
 
				+    static_assert(sizeof(T) <= 8);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+template <typename T, typename /*enable_if*/>
			
 
				+inline auto Hasher::Hash(const T& value) -> void {
			
 
				+  if constexpr (sizeof(T) <= 8) {
			
 
				+    // For types size 8-bytes and smaller directly being hashed (as opposed to
			
 
				+    // 8-bytes potentially bit-packed with data), we rarely expect the incoming
			
 
				+    // data to fully and densely populate all 8 bytes. For these cases we have a
			
 
				+    // `WeakMix` routine that is lower latency but lower quality.
			
 
				+    CARBON_MCA_BEGIN("fixed-8b");
			
 
				+    buffer = WeakMix(ReadSmall(value));
			
 
				+    CARBON_MCA_END("fixed-8b");
			
 
				+    return;
			
 
				+  }
			
 
				+
			
 
				+  const auto* data_ptr = reinterpret_cast<const std::byte*>(&value);
			
 
				+  if constexpr (8 < sizeof(T) && sizeof(T) <= 16) {
			
 
				+    CARBON_MCA_BEGIN("fixed-16b");
			
 
				+    auto values = Read8To16(data_ptr, sizeof(T));
			
 
				+    HashDense(values.first, values.second);
			
 
				+    CARBON_MCA_END("fixed-16b");
			
 
				+    return;
			
 
				+  }
			
 
				+
			
 
				+  if constexpr (16 < sizeof(T) && sizeof(T) <= 32) {
			
 
				+    CARBON_MCA_BEGIN("fixed-32b");
			
 
				+    // Essentially the same technique used for dynamically sized byte sequences
			
 
				+    // of this size, but we start with a fixed XOR of random data.
			
 
				+    buffer ^= StaticRandomData[0];
			
 
				+    uint64_t m0 = Mix(Read8(data_ptr) ^ StaticRandomData[1],
			
 
				+                      Read8(data_ptr + 8) ^ buffer);
			
 
				+    const std::byte* tail_16b_ptr = data_ptr + (sizeof(T) - 16);
			
 
				+    uint64_t m1 = Mix(Read8(tail_16b_ptr) ^ StaticRandomData[3],
			
 
				+                      Read8(tail_16b_ptr + 8) ^ buffer);
			
 
				+    buffer = m0 ^ m1;
			
 
				+    CARBON_MCA_END("fixed-32b");
			
 
				+    return;
			
 
				+  }
			
 
				+
			
 
				+  // Hashing the size isn't relevant here, but is harmless, so fall back to a
			
 
				+  // common code path.
			
 
				+  HashSizedBytesLarge(llvm::ArrayRef<std::byte>(data_ptr, sizeof(T)));
			
 
				+}
			
 
				+
			
 
				+template <typename... Ts, typename /*enable_if*/>
			
 
				+inline auto Hasher::Hash(const Ts&... value) -> void {
			
 
				+  if constexpr (sizeof...(Ts) == 0) {
			
 
				+    buffer ^= StaticRandomData[0];
			
 
				+    return;
			
 
				+  }
			
 
				+  if constexpr (sizeof...(Ts) == 1) {
			
 
				+    Hash(value...);
			
 
				+    return;
			
 
				+  }
			
 
				+  if constexpr ((... && (sizeof(Ts) <= 8))) {
			
 
				+    if constexpr (sizeof...(Ts) == 2) {
			
 
				+      HashDense(ReadSmall(value)...);
			
 
				+      return;
			
 
				+    }
			
 
				+
			
 
				+    // More than two, but all small -- read each one into a contiguous buffer of
			
 
				+    // data. This may be a bit memory wasteful by padding everything out to
			
 
				+    // 8-byte chunks, but for that regularity the hashing is likely faster.
			
 
				+    const uint64_t data[] = {ReadSmall(value)...};
			
 
				+    Hash(data);
			
 
				+    return;
			
 
				+  }
			
 
				+
			
 
				+  // For larger objects, hash each one down to a hash code and then hash those
			
 
				+  // as a buffer.
			
 
				+  const uint64_t data[] = {static_cast<uint64_t>(HashValue(value))...};
			
 
				+  Hash(data);
			
 
				+}
			
 
				+
			
 
				+inline auto Hasher::HashSizedBytes(llvm::ArrayRef<std::byte> bytes) -> void {
			
 
				+  const std::byte* data_ptr = bytes.data();
			
 
				+  const ssize_t size = bytes.size();
			
 
				+
			
 
				+  // First handle short sequences under 8 bytes. We distribute the branches a
			
 
				+  // bit for short strings.
			
 
				+  if (size <= 8) {
			
 
				+    if (size >= 4) {
			
 
				+      CARBON_MCA_BEGIN("dynamic-8b");
			
 
				+      uint64_t data = Read4To8(data_ptr, size);
			
 
				+      // We optimize for latency on short strings by hashing both the data and
			
 
				+      // size in a single multiply here, using the small nature of size to
			
 
				+      // sample a specific sequence of bytes with well distributed bits into one
			
 
				+      // side of the multiply. This results in a *statistically* weak hash
			
 
				+      // function, but one with very low latency.
			
 
				+      //
			
 
				+      // Note that we don't drop to the `WeakMix` routine here because we want
			
 
				+      // to use sampled random data to encode the size, which may not be as
			
 
				+      // effective without the full 128-bit folded result.
			
 
				+      buffer = Mix(data ^ buffer, SampleRandomData(size));
			
 
				+      CARBON_MCA_END("dynamic-8b");
			
 
				+      return;
			
 
				+    }
			
 
				+
			
 
				+    // When we only have 0-3 bytes of string, we can avoid the cost of `Mix`.
			
 
				+    // Instead, for empty strings we can just XOR some of our data against the
			
 
				+    // existing buffer. For 1-3 byte lengths we do 3 one-byte reads adjusted to
			
 
				+    // always read in-bounds without branching. Then we OR the size into the 4th
			
 
				+    // byte and use `WeakMix`.
			
 
				+    CARBON_MCA_BEGIN("dynamic-4b");
			
 
				+    if (size == 0) {
			
 
				+      buffer ^= StaticRandomData[0];
			
 
				+    } else {
			
 
				+      uint64_t data = Read1To3(data_ptr, size) | size << 24;
			
 
				+      buffer = WeakMix(data);
			
 
				+    }
			
 
				+    CARBON_MCA_END("dynamic-4b");
			
 
				+    return;
			
 
				+  }
			
 
				+
			
 
				+  if (size <= 16) {
			
 
				+    CARBON_MCA_BEGIN("dynamic-16b");
			
 
				+    // Similar to the above, we optimize primarily for latency here and spread
			
 
				+    // the incoming data across both ends of the multiply. Note that this does
			
 
				+    // have a drawback -- any time one half of the mix function becomes zero it
			
 
				+    // will fail to incorporate any bits from the other half. However, there is
			
 
				+    // exactly 1 in 2^64 values for each side that achieve this, and only when
			
 
				+    // the size is exactly 16 -- for smaller sizes there is an overlapping byte
			
 
				+    // that makes this impossible unless the seed is *also* incredibly unlucky.
			
 
				+    //
			
 
				+    // Because this hash function makes no attempt to defend against hash
			
 
				+    // flooding, we accept this risk in order to keep the latency low. If this
			
 
				+    // becomes a non-flooding problem, we can restrict the size to <16 and send
			
 
				+    // the 16-byte case down the next tier of cost.
			
 
				+    uint64_t size_hash = SampleRandomData(size);
			
 
				+    auto data = Read8To16(data_ptr, size);
			
 
				+    buffer = Mix(data.first ^ size_hash, data.second ^ buffer);
			
 
				+    CARBON_MCA_END("dynamic-16b");
			
 
				+    return;
			
 
				+  }
			
 
				+
			
 
				+  if (size <= 32) {
			
 
				+    CARBON_MCA_BEGIN("dynamic-32b");
			
 
				+    // Do two mixes of overlapping 16-byte ranges in parallel to minimize
			
 
				+    // latency. We also incorporate the size by sampling random data into the
			
 
				+    // seed before both.
			
 
				+    buffer ^= SampleRandomData(size);
			
 
				+    uint64_t m0 = Mix(Read8(data_ptr) ^ StaticRandomData[1],
			
 
				+                      Read8(data_ptr + 8) ^ buffer);
			
 
				+
			
 
				+    const std::byte* tail_16b_ptr = data_ptr + (size - 16);
			
 
				+    uint64_t m1 = Mix(Read8(tail_16b_ptr) ^ StaticRandomData[3],
			
 
				+                      Read8(tail_16b_ptr + 8) ^ buffer);
			
 
				+    // Just an XOR mix at the end is quite weak here, but we prefer that for
			
 
				+    // latency over a more robust approach. Doing another mix with the size (the
			
 
				+    // way longer string hashing does) increases the latency on x86-64
			
 
				+    // significantly (approx. 20%).
			
 
				+    buffer = m0 ^ m1;
			
 
				+    CARBON_MCA_END("dynamic-32b");
			
 
				+    return;
			
 
				+  }
			
 
				+
			
 
				+  HashSizedBytesLarge(bytes);
			
 
				+}
			
 
				+
			
 
				+}  // namespace Carbon
			
 
				+
			
 
				+#endif  // CARBON_COMMON_HASHING_H_
			
--- a/common/hashing_benchmark.cpp
+++ b/common/hashing_benchmark.cpp
@@ -0,0 +1,301 @@
 
				+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
			
 
				+// Exceptions. See /LICENSE for license information.
			
 
				+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
			
 
				+
			
 
				+#include <benchmark/benchmark.h>
			
 
				+
			
 
				+#include <algorithm>
			
 
				+#include <cstddef>
			
 
				+
			
 
				+#include "absl/hash/hash.h"
			
 
				+#include "absl/random/random.h"
			
 
				+#include "common/hashing.h"
			
 
				+#include "llvm/ADT/Hashing.h"
			
 
				+
			
 
				+namespace Carbon {
			
 
				+namespace {
			
 
				+
			
 
				+// We want the benchmark working set to fit in the L1 cache where possible so
			
 
				+// that the benchmark focuses on the CPU-execution costs and not memory latency.
			
 
				+// For most CPUs we're going to care about, 16k will fit easily, and 32k will
			
 
				+// probably fit. But we also need to include sizes for string benchmarks. This
			
 
				+// targets 8k of entropy with each object up to 8k of size for a total of 16k.
			
 
				+constexpr int EntropySize = 8 << 10;
			
 
				+constexpr int EntropyObjSize = 8 << 10;
			
 
				+
			
 
				+// An array of random entropy with `EntropySize` bytes plus 8k. The goal is that
			
 
				+// clients can read `EntropySize` objects of up to 8k size out of this pool by
			
 
				+// starting at different byte offsets.
			
 
				+static const llvm::ArrayRef<std::byte> entropy_bytes =
			
 
				+    []() -> llvm::ArrayRef<std::byte> {
			
 
				+  static llvm::SmallVector<std::byte> bytes;
			
 
				+  // Pad out the entropy for up to 1kb objects.
			
 
				+  bytes.resize(EntropySize + EntropyObjSize);
			
 
				+  absl::BitGen gen;
			
 
				+  for (std::byte& b : bytes) {
			
 
				+    b = static_cast<std::byte>(absl::Uniform<uint8_t>(gen));
			
 
				+  }
			
 
				+  return bytes;
			
 
				+}();
			
 
				+
			
 
				+// Based on 16k of entropy above and an L1 cache size often up to 32k, keep each
			
 
				+// array of sizes small at 8k or 1k 8-byte sizes.
			
 
				+constexpr int NumSizes = 1 << 10;
			
 
				+
			
 
				+// Selects an array of `NumSizes` sizes, witch each one in the range [0,
			
 
				+// MaxSize). The sizes will be in a random order, but the sum of sizes will
			
 
				+// always be the same.
			
 
				+template <size_t MaxSize>
			
 
				+static const std::array<size_t, NumSizes> rand_sizes = []() {
			
 
				+  std::array<size_t, NumSizes> sizes;
			
 
				+  // Build an array with a deterministic set of sizes in the
			
 
				+  // range [0, MaxSize), using the golden ratio to select well distributed
			
 
				+  // points in that range. See https://www.youtube.com/watch?v=lOIP_Z_-0Hs for
			
 
				+  // an example of why this is an effective strategy for selecting sizes in the
			
 
				+  // range.
			
 
				+  static_assert(NumSizes > 128);
			
 
				+  constexpr double Phi = 1.61803398875;
			
 
				+  constexpr size_t Scale = std::max<size_t>(1, MaxSize / Phi);
			
 
				+  for (auto [i, size] : llvm::enumerate(sizes)) {
			
 
				+    size = (i * Scale) % MaxSize;
			
 
				+  }
			
 
				+  // Shuffle the sizes randomly so that there isn't any pattern of sizes
			
 
				+  // encountered and we get relatively realistic branch prediction behavior
			
 
				+  // when branching on the size. We use this approach rather than random
			
 
				+  // sizes to ensure we always have the same total size of data processed.
			
 
				+  std::shuffle(sizes.begin(), sizes.end(), absl::BitGen());
			
 
				+  return sizes;
			
 
				+}();
			
 
				+
			
 
				+// A small helper class to synthesize random values out of our entropy pool.
			
 
				+// This is done in a way that depends on an arbitrary input (`x`) to allow us to
			
 
				+// create a benchmark that measures a *dependent* chain of hashes of these
			
 
				+// values.
			
 
				+//
			
 
				+// `T` needs to be default constructable and reasonable to synthesize an
			
 
				+// instance by copying random bytes into its underlying storage.
			
 
				+//
			
 
				+// This helper class also accumulates the number of bytes of data generated in
			
 
				+// order to let us compute throughput measurements as well as latency
			
 
				+// measurements.
			
 
				+//
			
 
				+// This helper class has the same API as the `RandStrings` helpers below so that
			
 
				+// they can all be used as type parameters to a common benchmark routine below.
			
 
				+template <typename T>
			
 
				+struct RandValues {
			
 
				+  size_t bytes = 0;
			
 
				+
			
 
				+  // Get a random value. We don't need to iterate through sizes so `i` is
			
 
				+  // ignored, but we use `x` to select our entropy ensuring a dependency on `x`
			
 
				+  // for the benchmark.
			
 
				+  auto Get(ssize_t /*i*/, uint64_t x) -> T {
			
 
				+    static_assert(sizeof(T) <= EntropyObjSize);
			
 
				+    bytes += sizeof(T);
			
 
				+    T result;
			
 
				+    memcpy(&result, &entropy_bytes[x % EntropySize], sizeof(T));
			
 
				+    return result;
			
 
				+  }
			
 
				+};
			
 
				+
			
 
				+// A specialization to help with building pairs of values.
			
 
				+template <typename T, typename U>
			
 
				+struct RandValues<std::pair<T, U>> {
			
 
				+  size_t bytes = 0;
			
 
				+
			
 
				+  auto Get(ssize_t /*i*/, uint64_t x) -> std::pair<T, U> {
			
 
				+    static_assert(sizeof(std::pair<T, U>) <= EntropyObjSize);
			
 
				+    bytes += sizeof(std::pair<T, U>);
			
 
				+    T result0;
			
 
				+    U result1;
			
 
				+    memcpy(&result0, &entropy_bytes[x % EntropySize], sizeof(T));
			
 
				+    memcpy(&result1, &entropy_bytes[x % EntropySize] + sizeof(T), sizeof(U));
			
 
				+    return {result0, result1};
			
 
				+  }
			
 
				+};
			
 
				+
			
 
				+// A helper class similar to `RandValues`, but for building strings rather than
			
 
				+// values. The string content is pulled from the entropy pool. The size can be
			
 
				+// random from [0, MaxSize], or it can be fixed at `MaxSize`. But the `MaxSize`
			
 
				+// cannot be larger than a single byte sequence pulled from the entropy pool
			
 
				+// (`EntropyObjSize`).
			
 
				+template <bool RandSize, size_t MaxSize>
			
 
				+struct RandStrings {
			
 
				+  size_t bytes = 0;
			
 
				+
			
 
				+  // Get a random string. If the sizes are random, we use `i` to select each
			
 
				+  // size and require it to be in the range [0, NumSizes). Otherwise `i` is
			
 
				+  // ignored. We always use `x` to select the entropy and establish a dependency
			
 
				+  // on the input.
			
 
				+  auto Get(ssize_t i, uint64_t x) -> llvm::StringRef {
			
 
				+    static_assert(MaxSize <= EntropyObjSize);
			
 
				+    size_t s = MaxSize;
			
 
				+    if constexpr (RandSize) {
			
 
				+      // When using random sizes, we leverage `i` which is guaranteed to range
			
 
				+      // from [0, NumSizes).
			
 
				+      s = rand_sizes<MaxSize>[i];
			
 
				+    } else {
			
 
				+      // Prevent `s` from being constant folded when we directly use `MaxSize`.
			
 
				+      benchmark::DoNotOptimize(s);
			
 
				+    }
			
 
				+    bytes += s;
			
 
				+    return llvm::StringRef(
			
 
				+        reinterpret_cast<const char*>(&entropy_bytes[x % EntropySize]), s);
			
 
				+  }
			
 
				+};
			
 
				+
			
 
				+struct HashBenchBase {
			
 
				+  uint64_t seed;
			
 
				+
			
 
				+  HashBenchBase() {
			
 
				+    // The real-world use case we care about is in a hash table where we'll mix
			
 
				+    // in some seed state, likely some ASLR address. To simulate this for
			
 
				+    // benchmarking, compute a seed from the address of a stack local variable.
			
 
				+    volatile char key;
			
 
				+    key = 42;
			
 
				+    // Rinse this through a volatile variable as well so returning it isn't
			
 
				+    // flagged. The whole point is to escape the address of something on the
			
 
				+    // stack.
			
 
				+    volatile auto key_addr = reinterpret_cast<uint64_t>(&key);
			
 
				+    seed = key_addr;
			
 
				+  }
			
 
				+};
			
 
				+
			
 
				+struct CarbonHashBench : HashBenchBase {
			
 
				+  template <typename T>
			
 
				+  auto operator()(const T& value) -> uint64_t {
			
 
				+    return static_cast<uint64_t>(HashValue(value, seed));
			
 
				+  }
			
 
				+};
			
 
				+
			
 
				+struct AbseilHashBench : HashBenchBase {
			
 
				+  template <typename T>
			
 
				+  auto operator()(const T& value) -> uint64_t {
			
 
				+    // Manually seed this with an after-the-fact XOR as there isn't a seeded
			
 
				+    // version. This matches what Abseil's hash tables do as well.
			
 
				+    return absl::HashOf(value) ^ seed;
			
 
				+  }
			
 
				+};
			
 
				+
			
 
				+struct LLVMHashBench : HashBenchBase {
			
 
				+  template <typename T>
			
 
				+  auto operator()(const T& value) -> uint64_t {
			
 
				+    // Manually seed this with an after-the-fact XOR as there isn't a seeded
			
 
				+    // version.
			
 
				+    return llvm::hash_value(value) ^ seed;
			
 
				+  }
			
 
				+};
			
 
				+
			
 
				+template <typename Values, typename Hasher>
			
 
				+void BM_LatencyHash(benchmark::State& state) {
			
 
				+  uint64_t x = 13;
			
 
				+  Values v;
			
 
				+  Hasher h;
			
 
				+  // We run the benchmark in `NumSizes` batches so that when needed we always
			
 
				+  // process each of the sizes and we don't randomly end up with a skewed set of
			
 
				+  // sizes.
			
 
				+  while (state.KeepRunningBatch(NumSizes)) {
			
 
				+    for (ssize_t i = 0; i < NumSizes; ++i) {
			
 
				+      benchmark::DoNotOptimize(x = h(v.Get(i, x)));
			
 
				+    }
			
 
				+  }
			
 
				+  state.SetBytesProcessed(v.bytes);
			
 
				+}
			
 
				+
			
 
				+// Latency benchmarks are grouped by the three different hash functions to
			
 
				+// facilitate comparing their performance for a given value type or string size
			
 
				+// bucket.
			
 
				+#define LATENCY_VALUE_BENCHMARKS(...)                                  \
			
 
				+  BENCHMARK(BM_LatencyHash<RandValues<__VA_ARGS__>, CarbonHashBench>); \
			
 
				+  BENCHMARK(BM_LatencyHash<RandValues<__VA_ARGS__>, AbseilHashBench>); \
			
 
				+  BENCHMARK(BM_LatencyHash<RandValues<__VA_ARGS__>, LLVMHashBench>)
			
 
				+LATENCY_VALUE_BENCHMARKS(uint8_t);
			
 
				+LATENCY_VALUE_BENCHMARKS(uint16_t);
			
 
				+LATENCY_VALUE_BENCHMARKS(std::pair<uint8_t, uint8_t>);
			
 
				+LATENCY_VALUE_BENCHMARKS(uint32_t);
			
 
				+LATENCY_VALUE_BENCHMARKS(std::pair<uint16_t, uint16_t>);
			
 
				+LATENCY_VALUE_BENCHMARKS(uint64_t);
			
 
				+LATENCY_VALUE_BENCHMARKS(int*);
			
 
				+LATENCY_VALUE_BENCHMARKS(std::pair<uint32_t, uint32_t>);
			
 
				+LATENCY_VALUE_BENCHMARKS(std::pair<uint64_t, uint32_t>);
			
 
				+LATENCY_VALUE_BENCHMARKS(std::pair<uint32_t, uint64_t>);
			
 
				+LATENCY_VALUE_BENCHMARKS(std::pair<int*, uint32_t>);
			
 
				+LATENCY_VALUE_BENCHMARKS(std::pair<uint32_t, int*>);
			
 
				+LATENCY_VALUE_BENCHMARKS(__uint128_t);
			
 
				+LATENCY_VALUE_BENCHMARKS(std::pair<uint64_t, uint64_t>);
			
 
				+LATENCY_VALUE_BENCHMARKS(std::pair<int*, int*>);
			
 
				+LATENCY_VALUE_BENCHMARKS(std::pair<uint64_t, int*>);
			
 
				+LATENCY_VALUE_BENCHMARKS(std::pair<int*, uint64_t>);
			
 
				+
			
 
				+#define LATENCY_STRING_BENCHMARKS(MaxSize)                          \
			
 
				+  BENCHMARK(BM_LatencyHash<RandStrings</*RandSize=*/true, MaxSize>, \
			
 
				+                           CarbonHashBench>);                       \
			
 
				+  BENCHMARK(BM_LatencyHash<RandStrings</*RandSize=*/true, MaxSize>, \
			
 
				+                           AbseilHashBench>);                       \
			
 
				+  BENCHMARK(                                                        \
			
 
				+      BM_LatencyHash<RandStrings</*RandSize=*/true, MaxSize>, LLVMHashBench>)
			
 
				+
			
 
				+LATENCY_STRING_BENCHMARKS(/*MaxSize=*/4);
			
 
				+LATENCY_STRING_BENCHMARKS(/*MaxSize=*/8);
			
 
				+LATENCY_STRING_BENCHMARKS(/*MaxSize=*/16);
			
 
				+LATENCY_STRING_BENCHMARKS(/*MaxSize=*/32);
			
 
				+LATENCY_STRING_BENCHMARKS(/*MaxSize=*/64);
			
 
				+LATENCY_STRING_BENCHMARKS(/*MaxSize=*/256);
			
 
				+LATENCY_STRING_BENCHMARKS(/*MaxSize=*/512);
			
 
				+LATENCY_STRING_BENCHMARKS(/*MaxSize=*/1024);
			
 
				+LATENCY_STRING_BENCHMARKS(/*MaxSize=*/2048);
			
 
				+LATENCY_STRING_BENCHMARKS(/*MaxSize=*/4096);
			
 
				+LATENCY_STRING_BENCHMARKS(/*MaxSize=*/8192);
			
 
				+
			
 
				+// We also want to check for size-specific cliffs, particularly in small sizes
			
 
				+// and sizes around implementation inflection points such as powers of two and
			
 
				+// half-way points between powers of two. Because these benchmarks are looking
			
 
				+// for size-related cliffs, all the runs for particular hash function are kept
			
 
				+// together.
			
 
				+//
			
 
				+// Note: because these use a fixed size, their specific timing isn't terribly
			
 
				+// informative. The branch predictor behavior on a modern CPU will be
			
 
				+// significantly different in this benchmarks from any other and may distort all
			
 
				+// manner of the timings. The results should really only be compared between
			
 
				+// sizes for cliffs, and not directly compared with other numbers.
			
 
				+#define LATENCY_STRING_SIZE_BENCHMARKS(Hash)                             \
			
 
				+  BENCHMARK(BM_LatencyHash<RandStrings</*RandSize=*/false, 0>, Hash>);   \
			
 
				+  BENCHMARK(BM_LatencyHash<RandStrings</*RandSize=*/false, 1>, Hash>);   \
			
 
				+  BENCHMARK(BM_LatencyHash<RandStrings</*RandSize=*/false, 2>, Hash>);   \
			
 
				+  BENCHMARK(BM_LatencyHash<RandStrings</*RandSize=*/false, 3>, Hash>);   \
			
 
				+  BENCHMARK(BM_LatencyHash<RandStrings</*RandSize=*/false, 4>, Hash>);   \
			
 
				+  BENCHMARK(BM_LatencyHash<RandStrings</*RandSize=*/false, 5>, Hash>);   \
			
 
				+  BENCHMARK(BM_LatencyHash<RandStrings</*RandSize=*/false, 6>, Hash>);   \
			
 
				+  BENCHMARK(BM_LatencyHash<RandStrings</*RandSize=*/false, 7>, Hash>);   \
			
 
				+  BENCHMARK(BM_LatencyHash<RandStrings</*RandSize=*/false, 8>, Hash>);   \
			
 
				+  BENCHMARK(BM_LatencyHash<RandStrings</*RandSize=*/false, 9>, Hash>);   \
			
 
				+  BENCHMARK(BM_LatencyHash<RandStrings</*RandSize=*/false, 15>, Hash>);  \
			
 
				+  BENCHMARK(BM_LatencyHash<RandStrings</*RandSize=*/false, 16>, Hash>);  \
			
 
				+  BENCHMARK(BM_LatencyHash<RandStrings</*RandSize=*/false, 17>, Hash>);  \
			
 
				+  BENCHMARK(BM_LatencyHash<RandStrings</*RandSize=*/false, 23>, Hash>);  \
			
 
				+  BENCHMARK(BM_LatencyHash<RandStrings</*RandSize=*/false, 24>, Hash>);  \
			
 
				+  BENCHMARK(BM_LatencyHash<RandStrings</*RandSize=*/false, 25>, Hash>);  \
			
 
				+  BENCHMARK(BM_LatencyHash<RandStrings</*RandSize=*/false, 31>, Hash>);  \
			
 
				+  BENCHMARK(BM_LatencyHash<RandStrings</*RandSize=*/false, 32>, Hash>);  \
			
 
				+  BENCHMARK(BM_LatencyHash<RandStrings</*RandSize=*/false, 33>, Hash>);  \
			
 
				+  BENCHMARK(BM_LatencyHash<RandStrings</*RandSize=*/false, 47>, Hash>);  \
			
 
				+  BENCHMARK(BM_LatencyHash<RandStrings</*RandSize=*/false, 48>, Hash>);  \
			
 
				+  BENCHMARK(BM_LatencyHash<RandStrings</*RandSize=*/false, 49>, Hash>);  \
			
 
				+  BENCHMARK(BM_LatencyHash<RandStrings</*RandSize=*/false, 63>, Hash>);  \
			
 
				+  BENCHMARK(BM_LatencyHash<RandStrings</*RandSize=*/false, 64>, Hash>);  \
			
 
				+  BENCHMARK(BM_LatencyHash<RandStrings</*RandSize=*/false, 65>, Hash>);  \
			
 
				+  BENCHMARK(BM_LatencyHash<RandStrings</*RandSize=*/false, 91>, Hash>);  \
			
 
				+  BENCHMARK(BM_LatencyHash<RandStrings</*RandSize=*/false, 92>, Hash>);  \
			
 
				+  BENCHMARK(BM_LatencyHash<RandStrings</*RandSize=*/false, 93>, Hash>);  \
			
 
				+  BENCHMARK(BM_LatencyHash<RandStrings</*RandSize=*/false, 127>, Hash>); \
			
 
				+  BENCHMARK(BM_LatencyHash<RandStrings</*RandSize=*/false, 128>, Hash>); \
			
 
				+  BENCHMARK(BM_LatencyHash<RandStrings</*RandSize=*/false, 129>, Hash>)
			
 
				+
			
 
				+// Because these just look for size-related cliffs in performance, we only do a
			
 
				+// minimal number of benchmarks. There are a lot of sizes so this avoids wasted
			
 
				+// time in benchmark runs and there isn't much value from greater comparative
			
 
				+// coverage here.
			
 
				+LATENCY_STRING_SIZE_BENCHMARKS(CarbonHashBench);
			
 
				+LATENCY_STRING_SIZE_BENCHMARKS(AbseilHashBench);
			
 
				+
			
 
				+}  // namespace
			
 
				+}  // namespace Carbon
			
--- a/common/hashing_test.cpp
+++ b/common/hashing_test.cpp
@@ -0,0 +1,719 @@
 
				+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
			
 
				+// Exceptions. See /LICENSE for license information.
			
 
				+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
			
 
				+
			
 
				+#include "common/hashing.h"
			
 
				+
			
 
				+#include <gmock/gmock.h>
			
 
				+#include <gtest/gtest.h>
			
 
				+
			
 
				+#include <type_traits>
			
 
				+
			
 
				+#include "llvm/ADT/Sequence.h"
			
 
				+#include "llvm/ADT/StringExtras.h"
			
 
				+#include "llvm/Support/FormatVariadic.h"
			
 
				+#include "llvm/Support/TypeName.h"
			
 
				+
			
 
				+namespace Carbon {
			
 
				+namespace {
			
 
				+
			
 
				+using ::testing::Eq;
			
 
				+using ::testing::Le;
			
 
				+using ::testing::Ne;
			
 
				+
			
 
				+TEST(HashingTest, HashCodeAPI) {
			
 
				+  // Manually compute a few hash codes where we can exercise the underlying API.
			
 
				+  HashCode empty = HashValue("");
			
 
				+  HashCode a = HashValue("a");
			
 
				+  HashCode b = HashValue("b");
			
 
				+  ASSERT_THAT(HashValue(""), Eq(empty));
			
 
				+  ASSERT_THAT(HashValue("a"), Eq(a));
			
 
				+  ASSERT_THAT(HashValue("b"), Eq(b));
			
 
				+  ASSERT_THAT(empty, Ne(a));
			
 
				+  ASSERT_THAT(empty, Ne(b));
			
 
				+  ASSERT_THAT(a, Ne(b));
			
 
				+
			
 
				+  // Exercise the methods in basic ways across a few sizes. This doesn't check
			
 
				+  // much beyond stability across re-computed values, crashing, or hitting UB.
			
 
				+  EXPECT_THAT(HashValue("a").ExtractIndex(2), Eq(a.ExtractIndex(2)));
			
 
				+  EXPECT_THAT(HashValue("a").ExtractIndex(4), Eq(a.ExtractIndex(4)));
			
 
				+  EXPECT_THAT(HashValue("a").ExtractIndex(8), Eq(a.ExtractIndex(8)));
			
 
				+  EXPECT_THAT(HashValue("a").ExtractIndex(1 << 10),
			
 
				+              Eq(a.ExtractIndex(1 << 10)));
			
 
				+  EXPECT_THAT(HashValue("a").ExtractIndex(1 << 20),
			
 
				+              Eq(a.ExtractIndex(1 << 20)));
			
 
				+  EXPECT_THAT(HashValue("a").ExtractIndex(1 << 30),
			
 
				+              Eq(a.ExtractIndex(1 << 30)));
			
 
				+  EXPECT_THAT(HashValue("a").ExtractIndex(1LL << 40),
			
 
				+              Eq(a.ExtractIndex(1LL << 40)));
			
 
				+  EXPECT_THAT(HashValue("a").ExtractIndex(1LL << 50),
			
 
				+              Eq(a.ExtractIndex(1LL << 50)));
			
 
				+
			
 
				+  EXPECT_THAT(a.ExtractIndex(8), Ne(b.ExtractIndex(8)));
			
 
				+  EXPECT_THAT(a.ExtractIndex(8), Ne(empty.ExtractIndex(8)));
			
 
				+
			
 
				+  // Note that the index produced with a tag may be different from the index
			
 
				+  // alone!
			
 
				+  EXPECT_THAT(HashValue("a").ExtractIndexAndTag<2>(2),
			
 
				+              Eq(a.ExtractIndexAndTag<2>(2)));
			
 
				+  EXPECT_THAT(HashValue("a").ExtractIndexAndTag<16>(4),
			
 
				+              Eq(a.ExtractIndexAndTag<16>(4)));
			
 
				+  EXPECT_THAT(HashValue("a").ExtractIndexAndTag<7>(8),
			
 
				+              Eq(a.ExtractIndexAndTag<7>(8)));
			
 
				+  EXPECT_THAT(HashValue("a").ExtractIndexAndTag<7>(1 << 10),
			
 
				+              Eq(a.ExtractIndexAndTag<7>(1 << 10)));
			
 
				+  EXPECT_THAT(HashValue("a").ExtractIndexAndTag<7>(1 << 20),
			
 
				+              Eq(a.ExtractIndexAndTag<7>(1 << 20)));
			
 
				+  EXPECT_THAT(HashValue("a").ExtractIndexAndTag<7>(1 << 30),
			
 
				+              Eq(a.ExtractIndexAndTag<7>(1 << 30)));
			
 
				+  EXPECT_THAT(HashValue("a").ExtractIndexAndTag<7>(1LL << 40),
			
 
				+              Eq(a.ExtractIndexAndTag<7>(1LL << 40)));
			
 
				+  EXPECT_THAT(HashValue("a").ExtractIndexAndTag<7>(1LL << 50),
			
 
				+              Eq(a.ExtractIndexAndTag<7>(1LL << 50)));
			
 
				+
			
 
				+  const auto [a_index, a_tag] = a.ExtractIndexAndTag<4>(8);
			
 
				+  const auto [b_index, b_tag] = b.ExtractIndexAndTag<4>(8);
			
 
				+  EXPECT_THAT(a_index, Ne(b_index));
			
 
				+  EXPECT_THAT(a_tag, Ne(b_tag));
			
 
				+}
			
 
				+
			
 
				+TEST(HashingTest, Integers) {
			
 
				+  for (int64_t i : {0, 1, 2, 3, 42, -1, -2, -3, -13}) {
			
 
				+    SCOPED_TRACE(llvm::formatv("Hashing: {0}", i).str());
			
 
				+    auto test_int_hash = [](auto i) {
			
 
				+      using T = decltype(i);
			
 
				+      SCOPED_TRACE(
			
 
				+          llvm::formatv("Hashing type: {0}", llvm::getTypeName<T>()).str());
			
 
				+      HashCode hash = HashValue(i);
			
 
				+      // Hashes should be stable within the execution.
			
 
				+      EXPECT_THAT(HashValue(i), Eq(hash));
			
 
				+
			
 
				+      // Zero should match, and other integers shouldn't collide trivially.
			
 
				+      HashCode hash_zero = HashValue(static_cast<T>(0));
			
 
				+      if (i == 0) {
			
 
				+        EXPECT_THAT(hash, Eq(hash_zero));
			
 
				+      } else {
			
 
				+        EXPECT_THAT(hash, Ne(hash_zero));
			
 
				+      }
			
 
				+    };
			
 
				+    test_int_hash(i);
			
 
				+    test_int_hash(static_cast<int8_t>(i));
			
 
				+    test_int_hash(static_cast<uint8_t>(i));
			
 
				+    test_int_hash(static_cast<int16_t>(i));
			
 
				+    test_int_hash(static_cast<uint16_t>(i));
			
 
				+    test_int_hash(static_cast<int32_t>(i));
			
 
				+    test_int_hash(static_cast<uint32_t>(i));
			
 
				+    test_int_hash(static_cast<int64_t>(i));
			
 
				+    test_int_hash(static_cast<uint64_t>(i));
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+TEST(HashingTest, Pointers) {
			
 
				+  int object1 = 42;
			
 
				+  std::string object2 =
			
 
				+      "Hello World! This is a long-ish string so it ends up on the heap!";
			
 
				+
			
 
				+  HashCode hash_null = HashValue(nullptr);
			
 
				+  // Hashes should be stable.
			
 
				+  EXPECT_THAT(HashValue(nullptr), Eq(hash_null));
			
 
				+
			
 
				+  // Hash other kinds of pointers without trivial collisions.
			
 
				+  HashCode hash1 = HashValue(&object1);
			
 
				+  HashCode hash2 = HashValue(&object2);
			
 
				+  HashCode hash3 = HashValue(object2.data());
			
 
				+  EXPECT_THAT(hash1, Ne(hash_null));
			
 
				+  EXPECT_THAT(hash2, Ne(hash_null));
			
 
				+  EXPECT_THAT(hash3, Ne(hash_null));
			
 
				+  EXPECT_THAT(hash1, Ne(hash2));
			
 
				+  EXPECT_THAT(hash1, Ne(hash3));
			
 
				+  EXPECT_THAT(hash2, Ne(hash3));
			
 
				+
			
 
				+  // Hash values reflect the address and not the type.
			
 
				+  EXPECT_THAT(HashValue(static_cast<void*>(nullptr)), Eq(hash_null));
			
 
				+  EXPECT_THAT(HashValue(static_cast<int*>(nullptr)), Eq(hash_null));
			
 
				+  EXPECT_THAT(HashValue(static_cast<std::string*>(nullptr)), Eq(hash_null));
			
 
				+  EXPECT_THAT(HashValue(reinterpret_cast<void*>(&object1)), Eq(hash1));
			
 
				+  EXPECT_THAT(HashValue(reinterpret_cast<int*>(&object2)), Eq(hash2));
			
 
				+  EXPECT_THAT(HashValue(reinterpret_cast<std::string*>(object2.data())),
			
 
				+              Eq(hash3));
			
 
				+}
			
 
				+
			
 
				+TEST(HashingTest, PairsAndTuples) {
			
 
				+  // Note that we can't compare hash codes across arity, or in general, compare
			
 
				+  // hash codes for different types as the type isn't part of the hash. These
			
 
				+  // hashes are targeted at use in hash tables which pick a single type that's
			
 
				+  // the basis of any comparison.
			
 
				+  HashCode hash_00 = HashValue(std::pair(0, 0));
			
 
				+  HashCode hash_01 = HashValue(std::pair(0, 1));
			
 
				+  HashCode hash_10 = HashValue(std::pair(1, 0));
			
 
				+  HashCode hash_11 = HashValue(std::pair(1, 1));
			
 
				+  EXPECT_THAT(hash_00, Ne(hash_01));
			
 
				+  EXPECT_THAT(hash_00, Ne(hash_10));
			
 
				+  EXPECT_THAT(hash_00, Ne(hash_11));
			
 
				+  EXPECT_THAT(hash_01, Ne(hash_10));
			
 
				+  EXPECT_THAT(hash_01, Ne(hash_11));
			
 
				+  EXPECT_THAT(hash_10, Ne(hash_11));
			
 
				+
			
 
				+  HashCode hash_000 = HashValue(std::tuple(0, 0, 0));
			
 
				+  HashCode hash_001 = HashValue(std::tuple(0, 0, 1));
			
 
				+  HashCode hash_010 = HashValue(std::tuple(0, 1, 0));
			
 
				+  HashCode hash_011 = HashValue(std::tuple(0, 1, 1));
			
 
				+  HashCode hash_100 = HashValue(std::tuple(1, 0, 0));
			
 
				+  HashCode hash_101 = HashValue(std::tuple(1, 0, 1));
			
 
				+  HashCode hash_110 = HashValue(std::tuple(1, 1, 0));
			
 
				+  HashCode hash_111 = HashValue(std::tuple(1, 1, 1));
			
 
				+  EXPECT_THAT(hash_000, Ne(hash_001));
			
 
				+  EXPECT_THAT(hash_000, Ne(hash_010));
			
 
				+  EXPECT_THAT(hash_000, Ne(hash_011));
			
 
				+  EXPECT_THAT(hash_000, Ne(hash_100));
			
 
				+  EXPECT_THAT(hash_000, Ne(hash_101));
			
 
				+  EXPECT_THAT(hash_000, Ne(hash_110));
			
 
				+  EXPECT_THAT(hash_000, Ne(hash_111));
			
 
				+  EXPECT_THAT(hash_001, Ne(hash_010));
			
 
				+  EXPECT_THAT(hash_001, Ne(hash_011));
			
 
				+  EXPECT_THAT(hash_001, Ne(hash_100));
			
 
				+  EXPECT_THAT(hash_001, Ne(hash_101));
			
 
				+  EXPECT_THAT(hash_001, Ne(hash_110));
			
 
				+  EXPECT_THAT(hash_001, Ne(hash_111));
			
 
				+  EXPECT_THAT(hash_010, Ne(hash_011));
			
 
				+  EXPECT_THAT(hash_010, Ne(hash_100));
			
 
				+  EXPECT_THAT(hash_010, Ne(hash_101));
			
 
				+  EXPECT_THAT(hash_010, Ne(hash_110));
			
 
				+  EXPECT_THAT(hash_010, Ne(hash_111));
			
 
				+  EXPECT_THAT(hash_011, Ne(hash_100));
			
 
				+  EXPECT_THAT(hash_011, Ne(hash_101));
			
 
				+  EXPECT_THAT(hash_011, Ne(hash_110));
			
 
				+  EXPECT_THAT(hash_011, Ne(hash_111));
			
 
				+  EXPECT_THAT(hash_100, Ne(hash_101));
			
 
				+  EXPECT_THAT(hash_100, Ne(hash_110));
			
 
				+  EXPECT_THAT(hash_100, Ne(hash_111));
			
 
				+  EXPECT_THAT(hash_101, Ne(hash_110));
			
 
				+  EXPECT_THAT(hash_101, Ne(hash_111));
			
 
				+  EXPECT_THAT(hash_110, Ne(hash_111));
			
 
				+
			
 
				+  // Hashing a 2-tuple and a pair should produce identical results, so pairs
			
 
				+  // are compatible with code using things like variadic tuple construction.
			
 
				+  EXPECT_THAT(HashValue(std::tuple(0, 0)), Eq(hash_00));
			
 
				+  EXPECT_THAT(HashValue(std::tuple(0, 1)), Eq(hash_01));
			
 
				+  EXPECT_THAT(HashValue(std::tuple(1, 0)), Eq(hash_10));
			
 
				+  EXPECT_THAT(HashValue(std::tuple(1, 1)), Eq(hash_11));
			
 
				+
			
 
				+  // Integers in tuples should also work.
			
 
				+  for (int i : {0, 1, 2, 3, 42, -1, -2, -3, -13}) {
			
 
				+    SCOPED_TRACE(llvm::formatv("Hashing: ({0}, {0}, {0})", i).str());
			
 
				+    auto test_int_tuple_hash = [](auto i) {
			
 
				+      using T = decltype(i);
			
 
				+      SCOPED_TRACE(
			
 
				+          llvm::formatv("Hashing integer type: {0}", llvm::getTypeName<T>())
			
 
				+              .str());
			
 
				+      std::tuple v = {i, i, i};
			
 
				+      HashCode hash = HashValue(v);
			
 
				+
			
 
				+      // Hashes should be stable within the execution.
			
 
				+      EXPECT_THAT(HashValue(v), Eq(hash));
			
 
				+
			
 
				+      // Zero should match, and other integers shouldn't collide trivially.
			
 
				+      T zero = 0;
			
 
				+      std::tuple zero_tuple = {zero, zero, zero};
			
 
				+      HashCode hash_zero = HashValue(zero_tuple);
			
 
				+      if (i == 0) {
			
 
				+        EXPECT_THAT(hash, Eq(hash_zero));
			
 
				+      } else {
			
 
				+        EXPECT_THAT(hash, Ne(hash_zero));
			
 
				+      }
			
 
				+    };
			
 
				+    test_int_tuple_hash(i);
			
 
				+    test_int_tuple_hash(static_cast<int8_t>(i));
			
 
				+    test_int_tuple_hash(static_cast<uint8_t>(i));
			
 
				+    test_int_tuple_hash(static_cast<int16_t>(i));
			
 
				+    test_int_tuple_hash(static_cast<uint16_t>(i));
			
 
				+    test_int_tuple_hash(static_cast<int32_t>(i));
			
 
				+    test_int_tuple_hash(static_cast<uint32_t>(i));
			
 
				+    test_int_tuple_hash(static_cast<int64_t>(i));
			
 
				+    test_int_tuple_hash(static_cast<uint64_t>(i));
			
 
				+
			
 
				+    // Heterogeneous integer types should also work, but we only support
			
 
				+    // comparing against hashes of tuples with the exact same type.
			
 
				+    using T1 = std::tuple<int8_t, uint32_t, int16_t>;
			
 
				+    using T2 = std::tuple<uint32_t, int16_t, uint64_t>;
			
 
				+    if (i == 0) {
			
 
				+      EXPECT_THAT(HashValue(T1{i, i, i}), Eq(HashValue(T1{0, 0, 0})));
			
 
				+      EXPECT_THAT(HashValue(T2{i, i, i}), Eq(HashValue(T2{0, 0, 0})));
			
 
				+    } else {
			
 
				+      EXPECT_THAT(HashValue(T1{i, i, i}), Ne(HashValue(T1{0, 0, 0})));
			
 
				+      EXPECT_THAT(HashValue(T2{i, i, i}), Ne(HashValue(T2{0, 0, 0})));
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  // Hash values of pointers in pairs and tuples reflect the address and not the
			
 
				+  // type. Pairs and 2-tuples give the same hash values.
			
 
				+  HashCode hash_2null = HashValue(std::pair(nullptr, nullptr));
			
 
				+  EXPECT_THAT(HashValue(std::tuple(static_cast<int*>(nullptr),
			
 
				+                                   static_cast<double*>(nullptr))),
			
 
				+              Eq(hash_2null));
			
 
				+
			
 
				+  // Hash other kinds of pointers without trivial collisions.
			
 
				+  int object1 = 42;
			
 
				+  std::string object2 = "Hello world!";
			
 
				+  HashCode hash_3ptr =
			
 
				+      HashValue(std::tuple(&object1, &object2, object2.data()));
			
 
				+  EXPECT_THAT(hash_3ptr, Ne(HashValue(std::tuple(nullptr, nullptr, nullptr))));
			
 
				+
			
 
				+  // Hash values reflect the address and not the type.
			
 
				+  EXPECT_THAT(
			
 
				+      HashValue(std::tuple(reinterpret_cast<void*>(&object1),
			
 
				+                           reinterpret_cast<int*>(&object2),
			
 
				+                           reinterpret_cast<std::string*>(object2.data()))),
			
 
				+      Eq(hash_3ptr));
			
 
				+}
			
 
				+
			
 
				+TEST(HashingTest, BasicStrings) {
			
 
				+  llvm::SmallVector<std::pair<std::string, HashCode>> hashes;
			
 
				+  for (int size : {0, 1, 2, 4, 16, 64, 256, 1024}) {
			
 
				+    std::string s(size, 'a');
			
 
				+    hashes.push_back({s, HashValue(s)});
			
 
				+  }
			
 
				+  for (const auto& [s1, hash1] : hashes) {
			
 
				+    EXPECT_THAT(HashValue(s1), Eq(hash1));
			
 
				+    // Also check that we get the same hashes even when using string-wrapping
			
 
				+    // types.
			
 
				+    EXPECT_THAT(HashValue(std::string_view(s1)), Eq(hash1));
			
 
				+    EXPECT_THAT(HashValue(llvm::StringRef(s1)), Eq(hash1));
			
 
				+
			
 
				+    // And some basic tests that simple things don't collide.
			
 
				+    for (const auto& [s2, hash2] : hashes) {
			
 
				+      if (s1 != s2) {
			
 
				+        EXPECT_THAT(hash1, Ne(hash2))
			
 
				+            << "Matching hashes for '" << s1 << "' and '" << s2 << "'";
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+struct HashableType {
			
 
				+  int x;
			
 
				+  int y;
			
 
				+
			
 
				+  int ignored = 0;
			
 
				+
			
 
				+  friend auto CarbonHashValue(const HashableType& value, uint64_t seed)
			
 
				+      -> HashCode {
			
 
				+    Hasher hasher(seed);
			
 
				+    hasher.Hash(value.x, value.y);
			
 
				+    return static_cast<HashCode>(hasher);
			
 
				+  }
			
 
				+};
			
 
				+
			
 
				+TEST(HashingTest, CustomType) {
			
 
				+  HashableType a = {.x = 1, .y = 2};
			
 
				+  HashableType b = {.x = 3, .y = 4};
			
 
				+
			
 
				+  EXPECT_THAT(HashValue(a), Eq(HashValue(a)));
			
 
				+  EXPECT_THAT(HashValue(a), Ne(HashValue(b)));
			
 
				+
			
 
				+  // Differences in an ignored field have no impact.
			
 
				+  HashableType c = {.x = 3, .y = 4, .ignored = 42};
			
 
				+  EXPECT_THAT(HashValue(c), Eq(HashValue(b)));
			
 
				+}
			
 
				+
			
 
				+// The only significantly bad seed is zero, so pick a non-zero seed with a tiny
			
 
				+// amount of entropy to make sure that none of the testing relies on the entropy
			
 
				+// from this.
			
 
				+constexpr uint64_t TestSeed = 42ULL * 1024;
			
 
				+
			
 
				+auto ToHexBytes(llvm::StringRef s) -> std::string {
			
 
				+  std::string rendered;
			
 
				+  llvm::raw_string_ostream os(rendered);
			
 
				+  os << "{";
			
 
				+  llvm::ListSeparator sep(", ");
			
 
				+  for (const char c : s) {
			
 
				+    os << sep << llvm::formatv("{0:x2}", static_cast<uint8_t>(c));
			
 
				+  }
			
 
				+  os << "}";
			
 
				+  return rendered;
			
 
				+}
			
 
				+
			
 
				+template <typename T>
			
 
				+struct HashedValue {
			
 
				+  HashCode hash;
			
 
				+  T v;
			
 
				+};
			
 
				+
			
 
				+using HashedString = HashedValue<std::string>;
			
 
				+
			
 
				+template <typename T>
			
 
				+auto PrintFullWidthHex(llvm::raw_ostream& os, T value) {
			
 
				+  static_assert(sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 ||
			
 
				+                sizeof(T) == 8);
			
 
				+  os << llvm::formatv(sizeof(T) == 1   ? "{0:x2}"
			
 
				+                      : sizeof(T) == 2 ? "{0:x4}"
			
 
				+                      : sizeof(T) == 4 ? "{0:x8}"
			
 
				+                                       : "{0:x16}",
			
 
				+                      static_cast<uint64_t>(value));
			
 
				+}
			
 
				+
			
 
				+template <typename T, typename = std::enable_if_t<std::is_integral_v<T>>>
			
 
				+auto operator<<(llvm::raw_ostream& os, HashedValue<T> hv)
			
 
				+    -> llvm::raw_ostream& {
			
 
				+  os << "hash " << hv.hash << " for value ";
			
 
				+  PrintFullWidthHex(os, hv.v);
			
 
				+  return os;
			
 
				+}
			
 
				+
			
 
				+template <typename T, typename U,
			
 
				+          typename = std::enable_if_t<std::is_integral_v<T>>,
			
 
				+          typename = std::enable_if_t<std::is_integral_v<U>>>
			
 
				+auto operator<<(llvm::raw_ostream& os, HashedValue<std::pair<T, U>> hv)
			
 
				+    -> llvm::raw_ostream& {
			
 
				+  os << "hash " << hv.hash << " for pair of ";
			
 
				+  PrintFullWidthHex(os, hv.v.first);
			
 
				+  os << " and ";
			
 
				+  PrintFullWidthHex(os, hv.v.second);
			
 
				+  return os;
			
 
				+}
			
 
				+
			
 
				+struct Collisions {
			
 
				+  int total;
			
 
				+  int median;
			
 
				+  int max;
			
 
				+};
			
 
				+
			
 
				+// Analyzes a list of hashed values to find all of the hash codes which collide
			
 
				+// within a specific bit-range.
			
 
				+//
			
 
				+// With `BitBegin=0` and `BitEnd=64`, this is equivalent to finding full
			
 
				+// collisions. But when the begin and end of the bit range are narrower than the
			
 
				+// 64-bits of the hash code, it allows this function to analyze a specific
			
 
				+// window of bits within the 64-bit hash code to understand how many collisions
			
 
				+// emerge purely within that bit range.
			
 
				+//
			
 
				+// With narrow ranges (we often look at the first N and last N bits for small
			
 
				+// N), collisions are common and so this function summarizes this with the total
			
 
				+// number of collisions and the median number of collisions for an input value.
			
 
				+template <int BitBegin, int BitEnd, typename T>
			
 
				+auto FindBitRangeCollisions(llvm::ArrayRef<HashedValue<T>> hashes)
			
 
				+    -> Collisions {
			
 
				+  static_assert(BitBegin < BitEnd);
			
 
				+  constexpr int BitCount = BitEnd - BitBegin;
			
 
				+  static_assert(BitCount <= 32);
			
 
				+  constexpr int BitShift = BitBegin;
			
 
				+  constexpr uint64_t BitMask = ((1ULL << BitCount) - 1) << BitShift;
			
 
				+
			
 
				+  // We collect counts of collisions in a vector. Initially, we just have a zero
			
 
				+  // and all inputs map to that collision count. As we discover collisions,
			
 
				+  // we'll create a dedicated counter for it and count how many inputs collide.
			
 
				+  llvm::SmallVector<int> collision_counts;
			
 
				+  collision_counts.push_back(0);
			
 
				+  // The "map" for collision counts. Each input hashed value has a corresponding
			
 
				+  // index stored here. That index is the index of the collision count in the
			
 
				+  // container above. We resize this to fill it with zeros to start as the zero
			
 
				+  // index above has a collision count of zero.
			
 
				+  //
			
 
				+  // The result of this is that the number of collisions for `hashes[i]` is
			
 
				+  // `collision_counts[collision_map[i]]`.
			
 
				+  llvm::SmallVector<int> collision_map;
			
 
				+  collision_map.resize(hashes.size());
			
 
				+
			
 
				+  // First, we extract the bit subsequence we want to examine from each hash and
			
 
				+  // store it with an index back into the hashed values (or the collision map).
			
 
				+  //
			
 
				+  // The result is that, `bits_and_indices[i].bits` has the hash bits of
			
 
				+  // interest from `hashes[bits_and_indices[i].index]`.
			
 
				+  //
			
 
				+  // And because `collision_map` above uses the same indices as `hashes`,
			
 
				+  // `collision_counts[collision_map[bits_and_indices[i].index]]` is the number
			
 
				+  // of collisions for `bits_and_indices[i].bits`.
			
 
				+  struct BitSequenceAndHashIndex {
			
 
				+    // The bit subsequence of a hash input, adjusted into the low bits.
			
 
				+    uint32_t bits;
			
 
				+    // The index of the hash input corresponding to this bit sequence.
			
 
				+    int index;
			
 
				+  };
			
 
				+  llvm::SmallVector<BitSequenceAndHashIndex> bits_and_indices;
			
 
				+  bits_and_indices.reserve(hashes.size());
			
 
				+  for (const auto& [hash, v] : hashes) {
			
 
				+    CARBON_DCHECK(v == hashes[bits_and_indices.size()].v);
			
 
				+    auto hash_bits = (static_cast<uint64_t>(hash) & BitMask) >> BitShift;
			
 
				+    bits_and_indices.push_back(
			
 
				+        {.bits = static_cast<uint32_t>(hash_bits),
			
 
				+         .index = static_cast<int>(bits_and_indices.size())});
			
 
				+  }
			
 
				+
			
 
				+  // Now we sort by the extracted bit sequence so we can efficiently scan for
			
 
				+  // colliding bit patterns.
			
 
				+  std::sort(
			
 
				+      bits_and_indices.begin(), bits_and_indices.end(),
			
 
				+      [](const auto& lhs, const auto& rhs) { return lhs.bits < rhs.bits; });
			
 
				+
			
 
				+  // Scan the sorted bit sequences we've extracted looking for collisions. We
			
 
				+  // count the total collisions, but we also track the number of individual
			
 
				+  // inputs that collide with each specific bit pattern.
			
 
				+  uint32_t prev_hash_bits = bits_and_indices[0].bits;
			
 
				+  int prev_index = bits_and_indices[0].index;
			
 
				+  bool in_collision = false;
			
 
				+  int total = 0;
			
 
				+  for (const auto& [hash_bits, hash_index] :
			
 
				+       llvm::ArrayRef(bits_and_indices).slice(1)) {
			
 
				+    // Check if we've found a new hash (and thus a new value), reset everything.
			
 
				+    CARBON_CHECK(hashes[prev_index].v != hashes[hash_index].v);
			
 
				+    if (hash_bits != prev_hash_bits) {
			
 
				+      CARBON_CHECK(hashes[prev_index].hash != hashes[hash_index].hash);
			
 
				+      prev_hash_bits = hash_bits;
			
 
				+      prev_index = hash_index;
			
 
				+      in_collision = false;
			
 
				+      continue;
			
 
				+    }
			
 
				+
			
 
				+    // Otherwise, we have a colliding bit sequence.
			
 
				+    ++total;
			
 
				+
			
 
				+    // If we've already created a collision count to track this, just increment
			
 
				+    // it and map this hash to it.
			
 
				+    if (in_collision) {
			
 
				+      ++collision_counts.back();
			
 
				+      collision_map[hash_index] = collision_counts.size() - 1;
			
 
				+      continue;
			
 
				+    }
			
 
				+
			
 
				+    // If this is a new collision, create a dedicated count to track it and
			
 
				+    // begin counting.
			
 
				+    in_collision = true;
			
 
				+    collision_map[prev_index] = collision_counts.size();
			
 
				+    collision_map[hash_index] = collision_counts.size();
			
 
				+    collision_counts.push_back(1);
			
 
				+  }
			
 
				+
			
 
				+  // Sort by collision count for each hash.
			
 
				+  std::sort(bits_and_indices.begin(), bits_and_indices.end(),
			
 
				+            [&](const auto& lhs, const auto& rhs) {
			
 
				+              return collision_counts[collision_map[lhs.index]] <
			
 
				+                     collision_counts[collision_map[rhs.index]];
			
 
				+            });
			
 
				+
			
 
				+  // And compute the median and max.
			
 
				+  int median = collision_counts
			
 
				+      [collision_map[bits_and_indices[bits_and_indices.size() / 2].index]];
			
 
				+  int max = *std::max_element(collision_counts.begin(), collision_counts.end());
			
 
				+  CARBON_CHECK(max ==
			
 
				+               collision_counts[collision_map[bits_and_indices.back().index]]);
			
 
				+  return {.total = total, .median = median, .max = max};
			
 
				+}
			
 
				+
			
 
				+auto CheckNoDuplicateValues(llvm::ArrayRef<HashedString> hashes) -> void {
			
 
				+  for (int i = 0, size = hashes.size(); i < size - 1; ++i) {
			
 
				+    const auto& [_, value] = hashes[i];
			
 
				+    CARBON_CHECK(value != hashes[i + 1].v) << "Duplicate value: " << value;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+template <int N>
			
 
				+auto AllByteStringsHashedAndSorted() {
			
 
				+  static_assert(N < 5, "Can only generate all 4-byte strings or shorter.");
			
 
				+
			
 
				+  llvm::SmallVector<HashedString> hashes;
			
 
				+  int64_t count = 1LL << (N * 8);
			
 
				+  for (int64_t i : llvm::seq(count)) {
			
 
				+    uint8_t bytes[N];
			
 
				+    for (int j : llvm::seq(N)) {
			
 
				+      bytes[j] = (static_cast<uint64_t>(i) >> (8 * j)) & 0xff;
			
 
				+    }
			
 
				+    std::string s(std::begin(bytes), std::end(bytes));
			
 
				+    hashes.push_back({HashValue(s, TestSeed), s});
			
 
				+  }
			
 
				+
			
 
				+  std::sort(hashes.begin(), hashes.end(),
			
 
				+            [](const HashedString& lhs, const HashedString& rhs) {
			
 
				+              return static_cast<uint64_t>(lhs.hash) <
			
 
				+                     static_cast<uint64_t>(rhs.hash);
			
 
				+            });
			
 
				+  CheckNoDuplicateValues(hashes);
			
 
				+
			
 
				+  return hashes;
			
 
				+}
			
 
				+
			
 
				+auto ExpectNoHashCollisions(llvm::ArrayRef<HashedString> hashes) -> void {
			
 
				+  HashCode prev_hash = hashes[0].hash;
			
 
				+  llvm::StringRef prev_s = hashes[0].v;
			
 
				+  for (const auto& [hash, s] : hashes.slice(1)) {
			
 
				+    if (hash != prev_hash) {
			
 
				+      prev_hash = hash;
			
 
				+      prev_s = s;
			
 
				+      continue;
			
 
				+    }
			
 
				+
			
 
				+    FAIL() << "Colliding hash '" << hash << "' of strings "
			
 
				+           << ToHexBytes(prev_s) << " and " << ToHexBytes(s);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+TEST(HashingTest, Collisions1ByteSized) {
			
 
				+  auto hashes_storage = AllByteStringsHashedAndSorted<1>();
			
 
				+  auto hashes = llvm::ArrayRef(hashes_storage);
			
 
				+  ExpectNoHashCollisions(hashes);
			
 
				+
			
 
				+  auto low_32bit_collisions = FindBitRangeCollisions<0, 32>(hashes);
			
 
				+  EXPECT_THAT(low_32bit_collisions.total, Eq(0));
			
 
				+  auto high_32bit_collisions = FindBitRangeCollisions<32, 64>(hashes);
			
 
				+  EXPECT_THAT(high_32bit_collisions.total, Eq(0));
			
 
				+
			
 
				+  // We expect collisions when only looking at 7-bits of the hash. However,
			
 
				+  // modern hash table designs need to use either the low or high 7 bits as tags
			
 
				+  // for faster searching. So we add some direct testing that the median and max
			
 
				+  // collisions for any given key stay within bounds. We express the bounds in
			
 
				+  // terms of the minimum expected "perfect" rate of collisions if uniformly
			
 
				+  // distributed.
			
 
				+  int min_7bit_collisions = llvm::NextPowerOf2(hashes.size() - 1) / (1 << 7);
			
 
				+  auto low_7bit_collisions = FindBitRangeCollisions<0, 7>(hashes);
			
 
				+  EXPECT_THAT(low_7bit_collisions.median, Le(2 * min_7bit_collisions));
			
 
				+  EXPECT_THAT(low_7bit_collisions.max, Le(4 * min_7bit_collisions));
			
 
				+  auto high_7bit_collisions = FindBitRangeCollisions<64 - 7, 64>(hashes);
			
 
				+  EXPECT_THAT(high_7bit_collisions.median, Le(2 * min_7bit_collisions));
			
 
				+  EXPECT_THAT(high_7bit_collisions.max, Le(4 * min_7bit_collisions));
			
 
				+}
			
 
				+
			
 
				+TEST(HashingTest, Collisions2ByteSized) {
			
 
				+  auto hashes_storage = AllByteStringsHashedAndSorted<2>();
			
 
				+  auto hashes = llvm::ArrayRef(hashes_storage);
			
 
				+  ExpectNoHashCollisions(hashes);
			
 
				+
			
 
				+  auto low_32bit_collisions = FindBitRangeCollisions<0, 32>(hashes);
			
 
				+  EXPECT_THAT(low_32bit_collisions.total, Eq(0));
			
 
				+  auto high_32bit_collisions = FindBitRangeCollisions<32, 64>(hashes);
			
 
				+  EXPECT_THAT(high_32bit_collisions.total, Eq(0));
			
 
				+
			
 
				+  // Similar to 1-byte keys, we do expect a certain rate of collisions here but
			
 
				+  // bound the median and max.
			
 
				+  int min_7bit_collisions = llvm::NextPowerOf2(hashes.size() - 1) / (1 << 7);
			
 
				+  auto low_7bit_collisions = FindBitRangeCollisions<0, 7>(hashes);
			
 
				+  EXPECT_THAT(low_7bit_collisions.median, Le(2 * min_7bit_collisions));
			
 
				+  EXPECT_THAT(low_7bit_collisions.max, Le(2 * min_7bit_collisions));
			
 
				+  auto high_7bit_collisions = FindBitRangeCollisions<64 - 7, 64>(hashes);
			
 
				+  EXPECT_THAT(high_7bit_collisions.median, Le(2 * min_7bit_collisions));
			
 
				+  EXPECT_THAT(high_7bit_collisions.max, Le(2 * min_7bit_collisions));
			
 
				+}
			
 
				+
			
 
				+// Generate and hash all strings of of [BeginByteCount, EndByteCount) bytes,
			
 
				+// with [BeginSetBitCount, EndSetBitCount) contiguous bits at each possible bit
			
 
				+// offset set to one and all other bits set to zero.
			
 
				+template <int BeginByteCount, int EndByteCount, int BeginSetBitCount,
			
 
				+          int EndSetBitCount>
			
 
				+struct SparseHashTestParamRanges {
			
 
				+  static_assert(BeginByteCount >= 0);
			
 
				+  static_assert(BeginByteCount < EndByteCount);
			
 
				+  static_assert(BeginSetBitCount >= 0);
			
 
				+  static_assert(BeginSetBitCount < EndSetBitCount);
			
 
				+  // Note that we intentionally allow the end-set-bit-count to result in more
			
 
				+  // set bits than are available -- we truncate the number of set bits to fit
			
 
				+  // within the byte string.
			
 
				+  static_assert(BeginSetBitCount <= BeginByteCount * 8);
			
 
				+
			
 
				+  struct ByteCount {
			
 
				+    static constexpr int Begin = BeginByteCount;
			
 
				+    static constexpr int End = EndByteCount;
			
 
				+  };
			
 
				+  struct SetBitCount {
			
 
				+    static constexpr int Begin = BeginSetBitCount;
			
 
				+    static constexpr int End = EndSetBitCount;
			
 
				+  };
			
 
				+};
			
 
				+
			
 
				+template <typename ParamRanges>
			
 
				+struct SparseHashTest : ::testing::Test {
			
 
				+  using ByteCount = typename ParamRanges::ByteCount;
			
 
				+  using SetBitCount = typename ParamRanges::SetBitCount;
			
 
				+
			
 
				+  static auto GetHashedByteStrings() {
			
 
				+    llvm::SmallVector<HashedString> hashes;
			
 
				+    for (int byte_count :
			
 
				+         llvm::seq_inclusive(ByteCount::Begin, ByteCount::End)) {
			
 
				+      int bits = byte_count * 8;
			
 
				+      for (int set_bit_count : llvm::seq_inclusive(
			
 
				+               SetBitCount::Begin, std::min(bits, SetBitCount::End))) {
			
 
				+        if (set_bit_count == 0) {
			
 
				+          std::string s(byte_count, '\0');
			
 
				+          hashes.push_back({HashValue(s, TestSeed), std::move(s)});
			
 
				+          continue;
			
 
				+        }
			
 
				+        for (int begin_set_bit : llvm::seq_inclusive(0, bits - set_bit_count)) {
			
 
				+          std::string s(byte_count, '\0');
			
 
				+
			
 
				+          int begin_set_bit_byte_index = begin_set_bit / 8;
			
 
				+          int begin_set_bit_bit_index = begin_set_bit % 8;
			
 
				+          int end_set_bit_byte_index = (begin_set_bit + set_bit_count) / 8;
			
 
				+          int end_set_bit_bit_index = (begin_set_bit + set_bit_count) % 8;
			
 
				+
			
 
				+          // We build a begin byte and end byte. We set the begin byte, set
			
 
				+          // subsequent bytes up to *and including* the end byte to all ones,
			
 
				+          // and then mask the end byte. For multi-byte runs, the mask just sets
			
 
				+          // the end byte and for single-byte runs the mask computes the
			
 
				+          // intersecting bits.
			
 
				+          //
			
 
				+          // Consider a 4-set-bit count, starting at bit 2. The begin bit index
			
 
				+          // is 2, and the end bit index is 6.
			
 
				+          //
			
 
				+          // Begin byte:  0b11111111 -(shl 2)-----> 0b11111100
			
 
				+          // End byte:    0b11111111 -(shr (8-6))-> 0b00111111
			
 
				+          // Masked byte:                           0b00111100
			
 
				+          //
			
 
				+          // Or a 10-set-bit-count starting at bit 2. The begin bit index is 2,
			
 
				+          // the end byte index is (12 / 8) or 1, and the end bit index is (12 %
			
 
				+          // 8) or 4.
			
 
				+          //
			
 
				+          // Begin byte:  0b11111111 -(shl 2)-----> 0b11111100 -> 6 bits
			
 
				+          // End byte:    0b11111111 -(shr (8-4))-> 0b00001111 -> 4 bits
			
 
				+          //                                                      10 total bits
			
 
				+          //
			
 
				+          uint8_t begin_set_bit_byte = 0xFFU << begin_set_bit_bit_index;
			
 
				+          uint8_t end_set_bit_byte = 0xFFU >> (8 - end_set_bit_bit_index);
			
 
				+          bool has_end_byte_bits = end_set_bit_byte != 0;
			
 
				+          s[begin_set_bit_byte_index] = begin_set_bit_byte;
			
 
				+          for (int i : llvm::seq(begin_set_bit_byte_index + 1,
			
 
				+                                 end_set_bit_byte_index + has_end_byte_bits)) {
			
 
				+            s[i] = '\xFF';
			
 
				+          }
			
 
				+          // If there are no bits set in the end byte, it may be past-the-end
			
 
				+          // and we can't even mask a zero byte safely.
			
 
				+          if (has_end_byte_bits) {
			
 
				+            s[end_set_bit_byte_index] &= end_set_bit_byte;
			
 
				+          }
			
 
				+          hashes.push_back({HashValue(s, TestSeed), std::move(s)});
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    std::sort(hashes.begin(), hashes.end(),
			
 
				+              [](const HashedString& lhs, const HashedString& rhs) {
			
 
				+                return static_cast<uint64_t>(lhs.hash) <
			
 
				+                       static_cast<uint64_t>(rhs.hash);
			
 
				+              });
			
 
				+    CheckNoDuplicateValues(hashes);
			
 
				+
			
 
				+    return hashes;
			
 
				+  }
			
 
				+};
			
 
				+
			
 
				+using SparseHashTestParams = ::testing::Types<
			
 
				+    SparseHashTestParamRanges</*BeginByteCount=*/0, /*EndByteCount=*/256,
			
 
				+                              /*BeginSetBitCount=*/0, /*EndSetBitCount=*/1>,
			
 
				+    SparseHashTestParamRanges</*BeginByteCount=*/1, /*EndByteCount=*/128,
			
 
				+                              /*BeginSetBitCount=*/2, /*EndSetBitCount=*/4>,
			
 
				+    SparseHashTestParamRanges</*BeginByteCount=*/1, /*EndByteCount=*/64,
			
 
				+                              /*BeginSetBitCount=*/4, /*EndSetBitCount=*/16>>;
			
 
				+TYPED_TEST_SUITE(SparseHashTest, SparseHashTestParams);
			
 
				+
			
 
				+TYPED_TEST(SparseHashTest, Collisions) {
			
 
				+  auto hashes_storage = this->GetHashedByteStrings();
			
 
				+  auto hashes = llvm::ArrayRef(hashes_storage);
			
 
				+  ExpectNoHashCollisions(hashes);
			
 
				+
			
 
				+  int min_7bit_collisions = llvm::NextPowerOf2(hashes.size() - 1) / (1 << 7);
			
 
				+  auto low_7bit_collisions = FindBitRangeCollisions<0, 7>(hashes);
			
 
				+  EXPECT_THAT(low_7bit_collisions.median, Le(2 * min_7bit_collisions));
			
 
				+  EXPECT_THAT(low_7bit_collisions.max, Le(2 * min_7bit_collisions));
			
 
				+  auto high_7bit_collisions = FindBitRangeCollisions<64 - 7, 64>(hashes);
			
 
				+  EXPECT_THAT(high_7bit_collisions.median, Le(2 * min_7bit_collisions));
			
 
				+  EXPECT_THAT(high_7bit_collisions.max, Le(2 * min_7bit_collisions));
			
 
				+}
			
 
				+
			
 
				+}  // namespace
			
 
				+}  // namespace Carbon