Bläddra i källkod

A collection of hashing improvements from using hashtables. (#4094)

LLVM's `APInt` and `APFloat` need specialized handling to be used
effectively in hashtables. We can't inject overrides into LLVM so we
need to handle them in our hashing routine.

There were also problematic limits on hashing pairs and tuples. First,
the unique-object-representation hashing of pairs was more restricted
than tuples which was a problematic asymmetry and isn't needed. But the
larger issue is that we didn't support recursively hashing when
necessary. That requires a careful predicate to avoid infinite recursion
but lets us handle important use cases for hashtables with a tuple as a
key.

Also added support for hashing arrays that recurse in addition to arrays
where we can hash the raw storage, and added overloads to redirect to
common array handling from various array-like types.

Last but not least, re-worked the constraint model for hashing as raw
data to not override custom hashing functions.

---------

Co-authored-by: Richard Smith <richard@metafoo.co.uk>
Co-authored-by: Carbon Infra Bot <carbon-external-infra@google.com>
Chandler Carruth 1 år sedan
förälder
incheckning
bf736e6b03
5 ändrade filer med 404 tillägg och 110 borttagningar
  1. 240 100
      common/hashing.h
  2. 160 6
      common/hashing_test.cpp
  3. 2 2
      common/raw_hashtable_test_helpers.h
  4. 1 1
      toolchain/sem_ir/bind_name.h
  5. 1 1
      toolchain/sem_ir/inst.h

+ 240 - 100
common/hashing.h

@@ -13,6 +13,8 @@
 
 #include "common/check.h"
 #include "common/ostream.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -233,6 +235,34 @@ class Hasher {
   // Extracts the current state as a `HashCode` for use.
   explicit operator HashCode() const { return HashCode(buffer); }
 
+  // Incorporates a variable number of objects into the `hasher`s state.
+  //
+  // The `values` here can be anything hashable, and this routine handles
+  // recursively hashing a single value as appropriate and then in turn
+  // incorporating that. However, it is optimized for relatively small numbers
+  // of values and/or small elements. A large tree structure will be better
+  // handled by a dedicated Merkle-tree decomposition rather than the ad-hoc one
+  // provided here. This routine's decomposition is mostly useful for combining
+  // N small bits of data with one recursively hashed entity.
+  //
+  // There is no guaranteed correspondence between the behavior of a single call
+  // with multiple parameters and multiple calls.
+  template <typename... Ts>
+  auto Hash(const Ts&... values) -> void;
+
+  // Incorporates an array of objects into the hasher's state.
+  //
+  // Similar to the variadic `Hash`, this will handle recursively hashing if
+  // necessary, but is optimized to avoid it when possible and is especially
+  // efficient when hashing a raw array of bytes.
+  //
+  // Note that this is especially inefficient when it must recursively hash for
+  // long arrays -- that pattern should be avoided if possible. It is usually
+  // more effective to optimize that pattern at a higher level with a dedicated
+  // hashing implementation.
+  template <typename T>
+  auto HashArray(llvm::ArrayRef<T> values) -> void;
+
   // Incorporates an object into the hasher's state by hashing its object
   // representation. Requires `value`'s type to have a unique object
   // representation. This is primarily useful for builtin and primitive types.
@@ -242,24 +272,7 @@ class Hasher {
   // aggregating several primitive types into a hash.
   template <typename T>
     requires std::has_unique_object_representations_v<T>
-  auto Hash(const T& value) -> void;
-
-  // Incorporates a variable number of objects into the `hasher`s state in a
-  // similar manner to applying the above function to each one in series. It has
-  // the same requirements as the above function for each `value`. And it
-  // returns the updated `hasher`.
-  //
-  // There is no guaranteed correspondence between the behavior of a single call
-  // with multiple parameters and multiple calls. This routine is also optimized
-  // for handling relatively small numbers of objects. For hashing large
-  // aggregations, consider some Merkle-tree decomposition or arranging for a
-  // byte buffer that can be hashed as a single buffer. However, hashing large
-  // aggregations of data in this way is rarely results in effectively
-  // high-performance hash table data structures and so should generally be
-  // avoided.
-  template <typename... Ts>
-    requires(... && std::has_unique_object_representations_v<Ts>)
-  auto Hash(const Ts&... value) -> void;
+  auto HashRaw(const T& value) -> void;
 
   // Simpler and more primitive functions to incorporate state represented in
   // `uint64_t` values into the hasher's state.
@@ -443,6 +456,14 @@ class Hasher {
 // public API. They should not be used directly by client code.
 namespace InternalHashDispatch {
 
+template <typename T>
+inline auto CarbonHashValue(llvm::ArrayRef<T> values, uint64_t seed)
+    -> HashCode {
+  Hasher hasher(seed);
+  hasher.HashArray(values);
+  return static_cast<HashCode>(hasher);
+}
+
 inline auto CarbonHashValue(llvm::ArrayRef<std::byte> bytes, uint64_t seed)
     -> HashCode {
   Hasher hasher(seed);
@@ -474,82 +495,140 @@ inline auto CarbonHashValue(const llvm::SmallString<Length>& value,
   return CarbonHashValue(llvm::StringRef(value.data(), value.size()), seed);
 }
 
-// C++ guarantees this is true for the unsigned variants, but we require it for
-// signed variants and pointers.
-static_assert(std::has_unique_object_representations_v<int8_t>);
-static_assert(std::has_unique_object_representations_v<int16_t>);
-static_assert(std::has_unique_object_representations_v<int32_t>);
-static_assert(std::has_unique_object_representations_v<int64_t>);
-static_assert(std::has_unique_object_representations_v<void*>);
-
-// C++ uses `std::nullptr_t` but unfortunately doesn't make it have a unique
-// object representation. To address that, we need a function that converts
-// `nullptr` back into a `void*` that will have a unique object representation.
-// And this needs to be done by-value as we need to build a temporary object to
-// return, which requires a separate overload rather than just using a type
-// function that could be used in parallel in the predicate below. Instead, we
-// build the predicate independently of the mapping overload, but together they
-// should produce the correct result.
+// Support types that are array-like by building an `llvm::ArrayRef` out of
+// them. We can't do this by accepting any type convertible to an `ArrayRef`
+// because that type supports building a synthetic array out of any single
+// element.
 template <typename T>
-inline auto MapNullPtrToVoidPtr(const T& value) -> const T& {
-  // This overload should never be selected for `std::nullptr_t`, so
-  // static_assert to get some better compiler error messages.
-  static_assert(!std::same_as<T, std::nullptr_t>);
-  return value;
+inline auto CarbonHashValue(const std::vector<T>& arg, uint64_t seed)
+    -> HashCode {
+  return CarbonHashValue(llvm::ArrayRef(arg), seed);
 }
-inline auto MapNullPtrToVoidPtr(std::nullptr_t /*value*/) -> const void* {
-  return nullptr;
+template <typename T>
+inline auto CarbonHashValue(const llvm::SmallVectorImpl<T>& arg, uint64_t seed)
+    -> HashCode {
+  return CarbonHashValue(llvm::ArrayRef(arg), seed);
+}
+template <typename T, size_t N>
+inline auto CarbonHashValue(const std::array<T, N>& arg, uint64_t seed)
+    -> HashCode {
+  return CarbonHashValue(llvm::ArrayRef(arg), seed);
+}
+template <typename T, size_t N>
+inline auto CarbonHashValue(const T (&arg)[N], uint64_t seed) -> HashCode {
+  return CarbonHashValue(llvm::ArrayRef(arg), seed);
 }
 
-// Implementation detail predicate to be used in conjunction with a `nullptr`
-// mapping routine like the above.
-template <typename T>
-concept NullPtrOrHasUniqueObjectRepresentations =
-    std::same_as<T, std::nullptr_t> ||
-    std::has_unique_object_representations_v<T>;
+inline auto CarbonHashValue(llvm::APInt value, uint64_t seed) -> HashCode {
+  Hasher hasher(seed);
+  if (LLVM_LIKELY(value.isSingleWord())) {
+    hasher.Hash(value.getBitWidth(), value.getZExtValue());
+  } else {
+    hasher.HashRaw(value.getBitWidth());
+    hasher.HashSizedBytes(
+        llvm::ArrayRef(value.getRawData(), value.getNumWords()));
+  }
+  return static_cast<HashCode>(hasher);
+}
 
-template <typename T>
-  requires NullPtrOrHasUniqueObjectRepresentations<T>
-inline auto CarbonHashValue(const T& value, uint64_t seed) -> HashCode {
+inline auto CarbonHashValue(llvm::APFloat value, uint64_t seed) -> HashCode {
   Hasher hasher(seed);
-  hasher.Hash(MapNullPtrToVoidPtr(value));
+  // Hashing floating point numbers is complex and depends on the specific
+  // internal semantics of `APFloat`, so delegate to the LLVM hashing framework
+  // here. We re-hash the result to mix in our seed. All of this is a bit
+  // inefficient, and we can revisit this to provide a dedicated implementation
+  // if it becomes a bottleneck.
+  using llvm::hash_value;
+  hasher.HashRaw(hash_value(value));
   return static_cast<HashCode>(hasher);
 }
 
 template <typename... Ts>
-  requires(... && NullPtrOrHasUniqueObjectRepresentations<Ts>)
 inline auto CarbonHashValue(const std::tuple<Ts...>& value, uint64_t seed)
     -> HashCode {
   Hasher hasher(seed);
-  std::apply(
-      [&](const auto&... args) { hasher.Hash(MapNullPtrToVoidPtr(args)...); },
-      value);
+  std::apply([&](const auto&... args) { hasher.Hash(args...); }, value);
   return static_cast<HashCode>(hasher);
 }
 
 template <typename T, typename U>
-  requires NullPtrOrHasUniqueObjectRepresentations<T> &&
-           NullPtrOrHasUniqueObjectRepresentations<U> &&
-           (sizeof(T) <= sizeof(uint64_t) && sizeof(U) <= sizeof(uint64_t))
 inline auto CarbonHashValue(const std::pair<T, U>& value, uint64_t seed)
     -> HashCode {
-  return CarbonHashValue(std::tuple(value.first, value.second), seed);
+  Hasher hasher(seed);
+  hasher.Hash(value.first, value.second);
+  return static_cast<HashCode>(hasher);
 }
 
+// Implementation detail predicate to detect if there is a `CarbonHashValue`
+// overload available for a particular type, either in this namespace or found
+// via ADL. Note that this should not be moved above any overloads.
 template <typename T>
-  requires std::has_unique_object_representations_v<T>
-inline auto CarbonHashValue(llvm::ArrayRef<T> objs, uint64_t seed) -> HashCode {
-  return CarbonHashValue(
-      llvm::ArrayRef(reinterpret_cast<const std::byte*>(objs.data()),
-                     objs.size() * sizeof(T)),
-      seed);
+concept HasCarbonHashValue = requires(const T& value, uint64_t seed) {
+  { CarbonHashValue(value, seed) } -> std::same_as<HashCode>;
+};
+
+// C++ guarantees this is true for the unsigned variants, but we require it for
+// signed variants and pointers.
+static_assert(std::has_unique_object_representations_v<int8_t>);
+static_assert(std::has_unique_object_representations_v<int16_t>);
+static_assert(std::has_unique_object_representations_v<int32_t>);
+static_assert(std::has_unique_object_representations_v<int64_t>);
+static_assert(std::has_unique_object_representations_v<void*>);
+
+// Overloaded function to provide mappings or conversions required to types that
+// should be hashed as plain data but where can't directly examine the storage.
+//
+// For example, C++ uses `std::nullptr_t` but unfortunately doesn't make it have
+// a unique object representation. To address that, we need a function that
+// converts `nullptr` back into a `void*` that will have a unique object
+// representation. And this needs to be done by-value as we need to build a
+// temporary object to return, which requires a separate overload rather than
+// just using a type function that could be used in parallel in the predicate
+// below. Instead, we build the predicate independently of the mapping overload,
+// but together they should produce the correct result.
+template <typename T>
+inline auto MapToRawDataType(const T& value) -> const T& {
+  // This overload should never be selected for `std::nullptr_t`, so
+  // static_assert to get some better compiler error messages.
+  static_assert(!std::same_as<T, std::nullptr_t>);
+  return value;
 }
+inline auto MapToRawDataType(std::nullptr_t /*value*/) -> const void* {
+  return nullptr;
+}
+
+// Implementation detail predicate to detect if we can hash as a raw data type.
+// When used, it should be combined with our mapping function `MapToRawDataType`
+// to handle any necessary edge cases that don't directly work.
+template <typename T>
+concept CanHashAsRawDataType = std::same_as<T, std::nullptr_t> ||
+                               std::has_unique_object_representations_v<T>;
 
+// Implementation of the unqualified dispatch to any provided `CarbonHashValue`
+// overloads, either here, or via ADL. Note that similar to
+// `HasCarbonHashValue`, this must not be moved above any of those overloads.
 template <typename T>
 inline auto DispatchImpl(const T& value, uint64_t seed) -> HashCode {
-  // This unqualified call will find both the overloads in this namespace and
-  // ADL-found functions in an associated namespace of `T`.
-  return CarbonHashValue(value, seed);
+  // If we have an explicit overload for `CarbonHashValue`, call it. This may be
+  // provided above or via ADL, and is preferred as it represents an explicit
+  // request for how the type is hashed.
+  if constexpr (HasCarbonHashValue<T>) {
+    return CarbonHashValue(value, seed);
+  } else if constexpr (CanHashAsRawDataType<T>) {
+    // There was no explicit overload to call, but the type allows us to hash it
+    // as raw data, do so.
+    Hasher hasher(seed);
+    hasher.HashRaw(MapToRawDataType(value));
+    return static_cast<HashCode>(hasher);
+  } else {
+    // We can only synthesize hashing for types that are hashable as raw data.
+    // This type isn't so fail a static assert due to the lack of an overload.
+    // We use the concept here to try and get the best diagnostics we can about
+    // candidates.
+    static_assert(HasCarbonHashValue<T>,
+                  "Attempted to hash a type which does not have a "
+                  "`CarbonHashValue` overload.");
+  }
 }
 
 }  // namespace InternalHashDispatch
@@ -716,9 +795,101 @@ inline auto Hasher::ReadSmall(const T& value) -> uint64_t {
   }
 }
 
+template <typename... Ts>
+inline auto Hasher::Hash(const Ts&... values) -> void {
+  if constexpr (sizeof...(Ts) == 0) {
+    buffer ^= StaticRandomData[0];
+    return;
+  }
+
+  using InternalHashDispatch::CanHashAsRawDataType;
+  using InternalHashDispatch::HasCarbonHashValue;
+  using InternalHashDispatch::MapToRawDataType;
+
+  // Special-case a single element tuple that we will hash as raw data.
+  if constexpr (sizeof...(Ts) == 1 && (... && (!HasCarbonHashValue<Ts> &&
+                                               CanHashAsRawDataType<Ts>))) {
+    HashRaw(MapToRawDataType(values)...);
+    return;
+  }
+
+  // Map each value into a uint64_t, either by hashing it using any custom hash
+  // function required, reading its data into a 64-bit value, or if large
+  // hashing it as raw data and using that hash code as the 64-bit data. This
+  // mirrors the logic in `InternalHashDispatch::DispatchImpl`, but minimizes
+  // early hashing of anything small we can just read as data. While this may be
+  // a little bit wasteful in some cases, collapsing down to a flat array of
+  // 64-bit integers is more efficient to hash.
+  auto map_value = []<typename T>(const T& value) -> uint64_t {
+    if constexpr (HasCarbonHashValue<T>) {
+      // Use the top-level `HashValue` to re-dispatch to the custom
+      // implementation with a fixed seed.
+      return static_cast<uint64_t>(HashValue(value));
+    } else if constexpr (CanHashAsRawDataType<T>) {
+      auto raw_value = MapToRawDataType(value);
+      if constexpr (sizeof(raw_value) <= 8) {
+        return ReadSmall(raw_value);
+      } else {
+        // Use the top-level `HashValue` to pick up a good fixed seed and hash
+        // this large object as raw data.
+        return static_cast<uint64_t>(HashValue(raw_value));
+      }
+    } else {
+      // We can only synthesize hashing for types that are hashable as raw data.
+      // This type isn't so fail a static assert due to the lack of an overload.
+      // We use the concept here to try and get the best diagnostics we can
+      // about candidates.
+      static_assert(HasCarbonHashValue<T>,
+                    "Attempted to hash a type which does not have a "
+                    "`CarbonHashValue` overload.");
+    }
+  };
+  const uint64_t data[] = {map_value(values)...};
+  if constexpr (sizeof...(Ts) == 2) {
+    HashDense(data[0], data[1]);
+    return;
+  }
+
+  HashRaw(data);
+}
+
+template <typename T>
+inline auto Hasher::HashArray(llvm::ArrayRef<T> values) -> void {
+  using InternalHashDispatch::CanHashAsRawDataType;
+  using InternalHashDispatch::HasCarbonHashValue;
+
+  // This logic similarly mirrors `InternalHashDispatch::DispatchImpl`, but is
+  // specialized here to allow us to efficiently process the array when it
+  // *doesn't* require recursive hashing.
+  if constexpr (HasCarbonHashValue<T>) {
+    // Use a trivial loop to give consistent behavior for arrays requiring
+    // recursive hashing. This isn't terribly efficient, but if clients care
+    // they should specialize the entire hashing operation. For simple, tiny
+    // cases, this avoids an awkward functionality cliff.
+    for (const T& value : values) {
+      HashDense(static_cast<uint64_t>(HashValue(value)));
+    }
+    HashRaw(values.size());
+  } else if constexpr (std::has_unique_object_representations_v<T>) {
+    // This code is a narrow special case for `CanHashAsRawDataType` that we can
+    // further hash the underlying storage directly. We check that it is a
+    // subset.
+    static_assert(CanHashAsRawDataType<T>);
+    HashSizedBytes(values);
+  } else {
+    // We can only synthesize hashing for types that are hashable as raw data.
+    // This type isn't so fail a static assert due to the lack of an overload.
+    // We use the concept here to try and get the best diagnostics we can
+    // about candidates.
+    static_assert(HasCarbonHashValue<T>,
+                  "Attempted to hash a type which does not have a "
+                  "`CarbonHashValue` overload.");
+  }
+}
+
 template <typename T>
   requires std::has_unique_object_representations_v<T>
-inline auto Hasher::Hash(const T& value) -> void {
+inline auto Hasher::HashRaw(const T& value) -> void {
   if constexpr (sizeof(T) <= 8) {
     // For types size 8-bytes and smaller directly being hashed (as opposed to
     // 8-bytes potentially bit-packed with data), we rarely expect the incoming
@@ -759,37 +930,6 @@ inline auto Hasher::Hash(const T& value) -> void {
   HashSizedBytesLarge(llvm::ArrayRef<std::byte>(data_ptr, sizeof(T)));
 }
 
-template <typename... Ts>
-  requires(... && std::has_unique_object_representations_v<Ts>)
-inline auto Hasher::Hash(const Ts&... value) -> void {
-  if constexpr (sizeof...(Ts) == 0) {
-    buffer ^= StaticRandomData[0];
-    return;
-  }
-  if constexpr (sizeof...(Ts) == 1) {
-    Hash(value...);
-    return;
-  }
-  if constexpr ((... && (sizeof(Ts) <= 8))) {
-    if constexpr (sizeof...(Ts) == 2) {
-      HashDense(ReadSmall(value)...);
-      return;
-    }
-
-    // More than two, but all small -- read each one into a contiguous buffer of
-    // data. This may be a bit memory wasteful by padding everything out to
-    // 8-byte chunks, but for that regularity the hashing is likely faster.
-    const uint64_t data[] = {ReadSmall(value)...};
-    Hash(data);
-    return;
-  }
-
-  // For larger objects, hash each one down to a hash code and then hash those
-  // as a buffer.
-  const uint64_t data[] = {static_cast<uint64_t>(HashValue(value))...};
-  Hash(data);
-}
-
 inline auto Hasher::HashSizedBytes(llvm::ArrayRef<std::byte> bytes) -> void {
   const std::byte* data_ptr = bytes.data();
   const ssize_t size = bytes.size();

+ 160 - 6
common/hashing_test.cpp

@@ -8,6 +8,7 @@
 #include <gtest/gtest.h>
 
 #include <concepts>
+#include <type_traits>
 
 #include "llvm/ADT/Sequence.h"
 #include "llvm/ADT/StringExtras.h"
@@ -283,20 +284,91 @@ TEST(HashingTest, BasicStrings) {
   }
 }
 
+TEST(HashingTest, ArrayLike) {
+  int c_array[] = {1, 2, 3, 4};
+  llvm::ArrayRef arr = c_array;
+  EXPECT_THAT(HashValue(c_array), Eq(HashValue(arr)));
+  EXPECT_THAT(HashValue(std::array{1, 2, 3, 4}), Eq(HashValue(arr)));
+  EXPECT_THAT(HashValue(std::vector{1, 2, 3, 4}), Eq(HashValue(arr)));
+  EXPECT_THAT(HashValue(llvm::SmallVector<int>{1, 2, 3, 4}),
+              Eq(HashValue(arr)));
+}
+
+TEST(HashingTest, HashAPInt) {
+  // The bit width should be hashed as well as the value.
+  llvm::APInt one_64(/*numBits=*/64, /*val=*/1);
+  llvm::APInt two_64(/*numBits=*/64, /*val=*/2);
+  llvm::APInt one_128(/*numBits=*/128, /*val=*/1);
+  llvm::APInt two_128(/*numBits=*/128, /*val=*/2);
+
+  std::array array = {one_64, two_64, one_128, two_128};
+  for (int i : llvm::seq<int>(array.size())) {
+    EXPECT_THAT(HashValue(array[i]), Eq(HashValue(array[i])));
+
+    for (int j : llvm::seq<int>(i + 1, array.size())) {
+      EXPECT_THAT(HashValue(array[i]), Ne(HashValue(array[j])))
+          << "Hashing #" << i << " and #" << j;
+    }
+  }
+}
+
+TEST(HashingTest, HashAPFloat) {
+  // Hashtable equality for `APFloat` uses a bitwise comparison. This
+  // differentiates between various things that would otherwise not make sense:
+  // - Different floating point semantics
+  // - `-0.0` and `0.0`
+  //
+  // It also allows NaNs to be compared meaningfully.
+  llvm::APFloat zero_float =
+      llvm::APFloat::getZero(llvm::APFloat::IEEEsingle());
+  llvm::APFloat neg_zero_float =
+      llvm::APFloat::getZero(llvm::APFloat::IEEEsingle(), /*Negative=*/true);
+  llvm::APFloat zero_double =
+      llvm::APFloat::getZero(llvm::APFloat::IEEEdouble());
+  llvm::APFloat zero_bfloat = llvm::APFloat::getZero(llvm::APFloat::BFloat());
+  llvm::APFloat one_float = llvm::APFloat::getOne(llvm::APFloat::IEEEsingle());
+  llvm::APFloat inf_float = llvm::APFloat::getInf(llvm::APFloat::IEEEsingle());
+  llvm::APFloat nan_0_float = llvm::APFloat::getNaN(
+      llvm::APFloat::IEEEsingle(), /*Negative=*/false, /*payload=*/0);
+  llvm::APFloat nan_42_float = llvm::APFloat::getNaN(
+      llvm::APFloat::IEEEsingle(), /*Negative=*/false, /*payload=*/42);
+
+  std::array array = {zero_float, neg_zero_float, zero_double, zero_bfloat,
+                      one_float,  inf_float,      nan_42_float};
+  for (int i : llvm::seq<int>(array.size())) {
+    EXPECT_THAT(HashValue(array[i]), Eq(HashValue(array[i])));
+
+    for (int j : llvm::seq<int>(i + 1, array.size())) {
+      EXPECT_THAT(HashValue(array[i]), Ne(HashValue(array[j])))
+          << "Hashing #" << i << " and #" << j;
+    }
+  }
+
+  // Note that currently we use LLVM's hashing of `APFloat` which does *not*
+  // hash the payload of NaNs.
+  EXPECT_THAT(HashValue(nan_0_float), Eq(HashValue(nan_42_float)));
+}
+
+// A type that has hashing customization. However, it also works to be small and
+// appear to have a unique object representation. This helps ensure that when a
+// user provides custom hashing it is reliably used.
 struct HashableType {
-  int x;
-  int y;
+  int8_t x;
+  int8_t y;
 
-  int ignored = 0;
+  int16_t ignored = 0;
 
-  // See common/hashing.h.
-  friend auto CarbonHashValue(const HashableType& value, uint64_t seed)
-      -> HashCode {
+  // Provide the hashing but try to craft a relatively low-ranking overload to
+  // help ensure that the hashing framework doesn't accidentally override this.
+  template <typename T>
+    requires(std::same_as<T, HashableType>)
+  friend auto CarbonHashValue(const T& value, uint64_t seed) -> HashCode {
     Hasher hasher(seed);
     hasher.Hash(value.x, value.y);
     return static_cast<HashCode>(hasher);
   }
 };
+static_assert(std::has_unique_object_representations_v<HashableType>);
 
 TEST(HashingTest, CustomType) {
   HashableType a = {.x = 1, .y = 2};
@@ -310,6 +382,88 @@ TEST(HashingTest, CustomType) {
   EXPECT_THAT(HashValue(c), Eq(HashValue(b)));
 }
 
+TEST(HashingTest, ArrayRecursion) {
+  // Make sure we correctly recurse when hashing an array and don't try to use
+  // the object representation.
+  llvm::APInt one_64(/*numBits=*/64, /*val=*/1);
+  llvm::APInt two_64(/*numBits=*/64, /*val=*/2);
+  llvm::APInt one_128(/*numBits=*/128, /*val=*/1);
+  llvm::APInt two_128(/*numBits=*/128, /*val=*/2);
+  std::array apint_array = {one_64, two_64, one_128, two_128};
+  EXPECT_THAT(HashValue(apint_array),
+              Eq(HashValue(std::array{one_64, two_64, one_128, two_128})));
+  EXPECT_THAT(HashValue(apint_array),
+              Ne(HashValue(std::array{one_64, two_64, two_128, one_128})));
+  EXPECT_THAT(HashValue(apint_array),
+              Ne(HashValue(std::array{one_64, two_64, one_64, two_128})));
+  EXPECT_THAT(HashValue(apint_array),
+              Ne(HashValue(std::array{one_64, two_128, one_128, two_128})));
+  EXPECT_THAT(HashValue(apint_array),
+              Ne(HashValue(std::array{one_64, two_64, one_128})));
+  EXPECT_THAT(
+      HashValue(apint_array),
+      Ne(HashValue(std::array{one_64, two_64, one_128, two_128, two_128})));
+
+  // Also test for a custom type that still *looks* like plain data.
+  HashableType a = {.x = 1, .y = 2};
+  HashableType b = {.x = 3, .y = 4};
+  HashableType c = {.x = 3, .y = 4, .ignored = 42};
+  std::array custom_array = {a, b, c, a};
+  EXPECT_THAT(HashValue(custom_array), Eq(HashValue(std::array{a, b, c, a})));
+  EXPECT_THAT(HashValue(custom_array), Eq(HashValue(std::array{a, b, b, a})));
+  EXPECT_THAT(HashValue(custom_array), Ne(HashValue(std::array{a, b, c, b})));
+  EXPECT_THAT(HashValue(custom_array), Ne(HashValue(std::array{a, b, a, c})));
+  EXPECT_THAT(HashValue(custom_array), Ne(HashValue(std::array{a, b, c})));
+  EXPECT_THAT(HashValue(custom_array),
+              Ne(HashValue(std::array{a, b, c, a, a})));
+}
+
+TEST(HashingTest, TupleRecursion) {
+  // Make sure we can hash pairs and tuples which require us to recurse for each
+  // element rather than treating the whole object as raw storage.
+
+  // We can use APInt values to help test this.
+  llvm::APInt one_64(/*numBits=*/64, /*val=*/1);
+  llvm::APInt two_64(/*numBits=*/64, /*val=*/2);
+  llvm::APInt one_128(/*numBits=*/128, /*val=*/1);
+  llvm::APInt two_128(/*numBits=*/128, /*val=*/2);
+  EXPECT_THAT(HashValue(std::pair{one_64, one_128}),
+              Eq(HashValue(std::pair{one_64, one_128})));
+  EXPECT_THAT(HashValue(std::pair{one_64, one_128}),
+              Ne(HashValue(std::pair{one_64, two_64})));
+  EXPECT_THAT(HashValue(std::pair{one_64, one_128}),
+              Ne(HashValue(std::pair{one_64, one_64})));
+  EXPECT_THAT(HashValue(std::pair{one_64, one_128}),
+              Ne(HashValue(std::pair{one_128, one_64})));
+  EXPECT_THAT(HashValue(std::tuple{one_64, one_128, two_64}),
+              Eq(HashValue(std::tuple{one_64, one_128, two_64})));
+  EXPECT_THAT(HashValue(std::tuple{one_64, one_128, two_64}),
+              Ne(HashValue(std::tuple{one_64, two_64, two_64})));
+  EXPECT_THAT(HashValue(std::tuple{one_64, one_128, two_64}),
+              Ne(HashValue(std::tuple{one_64, one_64, two_64})));
+  EXPECT_THAT(HashValue(std::tuple{one_64, one_128, two_64}),
+              Ne(HashValue(std::tuple{one_64, two_64, one_128})));
+  EXPECT_THAT(HashValue(std::tuple{one_64, one_128, two_64}),
+              Ne(HashValue(std::tuple{one_64, one_128})));
+
+  // Also test for a custom type that still *looks* like plain data.
+  HashableType a = {.x = 1, .y = 2};
+  HashableType b = {.x = 3, .y = 4};
+  HashableType c = {.x = 3, .y = 4, .ignored = 42};
+  EXPECT_THAT(HashValue(std::pair{a, b}), Eq(HashValue(std::pair{a, b})));
+  EXPECT_THAT(HashValue(std::pair{a, b}), Ne(HashValue(std::pair{a, a})));
+  EXPECT_THAT(HashValue(std::pair{a, b}), Ne(HashValue(std::pair{b, a})));
+  EXPECT_THAT(HashValue(std::pair{a, b}), Eq(HashValue(std::pair{a, c})));
+  EXPECT_THAT(HashValue(std::tuple{a, b, a}),
+              Eq(HashValue(std::tuple{a, b, a})));
+  EXPECT_THAT(HashValue(std::tuple{a, b, a}),
+              Ne(HashValue(std::tuple{a, b, b})));
+  EXPECT_THAT(HashValue(std::tuple{a, b, a}),
+              Ne(HashValue(std::tuple{a, a, a})));
+  EXPECT_THAT(HashValue(std::tuple{a, b, a}),
+              Eq(HashValue(std::tuple{a, c, a})));
+}
+
 // The only significantly bad seed is zero, so pick a non-zero seed with a tiny
 // amount of entropy to make sure that none of the testing relies on the entropy
 // from this.

+ 2 - 2
common/raw_hashtable_test_helpers.h

@@ -51,8 +51,8 @@ struct TestKeyContext : DefaultKeyContext {
   auto HashKey(const KeyT& key, uint64_t seed) const -> HashCode {
     Hasher hash(seed);
     // Inject some other data to the hash.
-    hash.Hash(42);
-    hash.Hash(HashValue(key));
+    hash.HashRaw(42);
+    hash.HashRaw(HashValue(key));
     return static_cast<HashCode>(hash);
   }
 };

+ 1 - 1
toolchain/sem_ir/bind_name.h

@@ -29,7 +29,7 @@ struct BindNameInfo : public Printable<BindNameInfo> {
 inline auto CarbonHashValue(const BindNameInfo& value, uint64_t seed)
     -> HashCode {
   Hasher hasher(seed);
-  hasher.Hash(value);
+  hasher.HashRaw(value);
   return static_cast<HashCode>(hasher);
 }
 

+ 1 - 1
toolchain/sem_ir/inst.h

@@ -458,7 +458,7 @@ class InstBlockStore : public BlockValueStore<InstBlockId> {
 // See common/hashing.h.
 inline auto CarbonHashValue(const Inst& value, uint64_t seed) -> HashCode {
   Hasher hasher(seed);
-  hasher.Hash(value);
+  hasher.HashRaw(value);
   return static_cast<HashCode>(hasher);
 }