Browse Source

Simplify the index & tag extraction API for hash codes. (#3627)

The fancier API ended up not being helpful and making it harder to
optimize a hash table implemented on top of this.
Chandler Carruth 2 years ago
parent
commit
7e9760d9e4
2 changed files with 29 additions and 55 deletions
  1. 16 20
      common/hashing.h
  2. 13 35
      common/hashing_test.cpp

+ 16 - 20
common/hashing.h

@@ -44,23 +44,24 @@ class HashCode : public Printable<HashCode> {
     return lhs.value_ != rhs.value_;
   }
 
-  // Extracts an index from the hash code that is in the range [0, size). The
-  // size and returned index are `ssize_t` for performance reasons. This is
-  // useful when using the hash code to index a hash table. It prioritizes
-  // computing the index from the bits in the hash code with the highest
-  // entropy.
-  constexpr auto ExtractIndex(ssize_t size) -> ssize_t;
+  // Extracts an index from the hash code as a `ssize_t`. This index covers the
+  // full range of that type, and may even be negative. Typical usage will
+  // involve masking this down to some positive range using a bitand with a mask
+  // computed from a power-of-two size. This routine doesn't do any masking to
+  // ensure a positive index to avoid redundant computations with the typical
+  // user of the index.
+  constexpr auto ExtractIndex() -> ssize_t;
 
   // Extracts an index and a fixed `N`-bit tag from the hash code.
   //
-  // This will both minimize overlap between the tag and the index as well as
-  // maximizing the entropy of the bits that contribute to each.
+  // This extracts these values from the position of the hash code which
+  // maximizes the entropy in the tag and the low bits of the index, as typical
+  // indices will be further masked down to fall in a smaller range.
   //
-  // The index will be in the range [0, `size`). The `size` must be a power of
-  // two, and `N` must be in the range [1, 32].
+  // `N` must be in the range [1, 32]. The returned index will be in the range
+  // [0, 2**(64-N)).
   template <int N>
-  constexpr auto ExtractIndexAndTag(ssize_t size)
-      -> std::pair<ssize_t, uint32_t>;
+  constexpr auto ExtractIndexAndTag() -> std::pair<ssize_t, uint32_t>;
 
   // Extract the full 64-bit hash code as an integer.
   //
@@ -534,19 +535,14 @@ inline auto HashValue(const T& value) -> HashCode {
   return HashValue(value, Hasher::StaticRandomData[7]);
 }
 
-inline constexpr auto HashCode::ExtractIndex(ssize_t size) -> ssize_t {
-  CARBON_DCHECK(llvm::isPowerOf2_64(size));
-  return value_ & (size - 1);
-}
+inline constexpr auto HashCode::ExtractIndex() -> ssize_t { return value_; }
 
 template <int N>
-inline constexpr auto HashCode::ExtractIndexAndTag(ssize_t size)
+inline constexpr auto HashCode::ExtractIndexAndTag()
     -> std::pair<ssize_t, uint32_t> {
   static_assert(N >= 1);
   static_assert(N <= 32);
-  CARBON_DCHECK(llvm::isPowerOf2_64(size));
-  CARBON_DCHECK(1LL << (64 - N) >= size) << "Not enough bits for size and tag!";
-  return {static_cast<ssize_t>((value_ >> N) & (size - 1)),
+  return {static_cast<ssize_t>(value_ >> N),
           static_cast<uint32_t>(value_ & ((1U << (N + 1)) - 1))};
 }
 

+ 13 - 35
common/hashing_test.cpp

@@ -35,44 +35,22 @@ TEST(HashingTest, HashCodeAPI) {
 
   // Exercise the methods in basic ways across a few sizes. This doesn't check
   // much beyond stability across re-computed values, crashing, or hitting UB.
-  EXPECT_THAT(HashValue("a").ExtractIndex(2), Eq(a.ExtractIndex(2)));
-  EXPECT_THAT(HashValue("a").ExtractIndex(4), Eq(a.ExtractIndex(4)));
-  EXPECT_THAT(HashValue("a").ExtractIndex(8), Eq(a.ExtractIndex(8)));
-  EXPECT_THAT(HashValue("a").ExtractIndex(1 << 10),
-              Eq(a.ExtractIndex(1 << 10)));
-  EXPECT_THAT(HashValue("a").ExtractIndex(1 << 20),
-              Eq(a.ExtractIndex(1 << 20)));
-  EXPECT_THAT(HashValue("a").ExtractIndex(1 << 30),
-              Eq(a.ExtractIndex(1 << 30)));
-  EXPECT_THAT(HashValue("a").ExtractIndex(1LL << 40),
-              Eq(a.ExtractIndex(1LL << 40)));
-  EXPECT_THAT(HashValue("a").ExtractIndex(1LL << 50),
-              Eq(a.ExtractIndex(1LL << 50)));
-
-  EXPECT_THAT(a.ExtractIndex(8), Ne(b.ExtractIndex(8)));
-  EXPECT_THAT(a.ExtractIndex(8), Ne(empty.ExtractIndex(8)));
+  EXPECT_THAT(HashValue("a").ExtractIndex(), Eq(a.ExtractIndex()));
+
+  EXPECT_THAT(a.ExtractIndex(), Ne(b.ExtractIndex()));
+  EXPECT_THAT(a.ExtractIndex(), Ne(empty.ExtractIndex()));
 
   // Note that the index produced with a tag may be different from the index
   // alone!
-  EXPECT_THAT(HashValue("a").ExtractIndexAndTag<2>(2),
-              Eq(a.ExtractIndexAndTag<2>(2)));
-  EXPECT_THAT(HashValue("a").ExtractIndexAndTag<16>(4),
-              Eq(a.ExtractIndexAndTag<16>(4)));
-  EXPECT_THAT(HashValue("a").ExtractIndexAndTag<7>(8),
-              Eq(a.ExtractIndexAndTag<7>(8)));
-  EXPECT_THAT(HashValue("a").ExtractIndexAndTag<7>(1 << 10),
-              Eq(a.ExtractIndexAndTag<7>(1 << 10)));
-  EXPECT_THAT(HashValue("a").ExtractIndexAndTag<7>(1 << 20),
-              Eq(a.ExtractIndexAndTag<7>(1 << 20)));
-  EXPECT_THAT(HashValue("a").ExtractIndexAndTag<7>(1 << 30),
-              Eq(a.ExtractIndexAndTag<7>(1 << 30)));
-  EXPECT_THAT(HashValue("a").ExtractIndexAndTag<7>(1LL << 40),
-              Eq(a.ExtractIndexAndTag<7>(1LL << 40)));
-  EXPECT_THAT(HashValue("a").ExtractIndexAndTag<7>(1LL << 50),
-              Eq(a.ExtractIndexAndTag<7>(1LL << 50)));
-
-  const auto [a_index, a_tag] = a.ExtractIndexAndTag<4>(8);
-  const auto [b_index, b_tag] = b.ExtractIndexAndTag<4>(8);
+  EXPECT_THAT(HashValue("a").ExtractIndexAndTag<2>(),
+              Eq(a.ExtractIndexAndTag<2>()));
+  EXPECT_THAT(HashValue("a").ExtractIndexAndTag<16>(),
+              Eq(a.ExtractIndexAndTag<16>()));
+  EXPECT_THAT(HashValue("a").ExtractIndexAndTag<7>(),
+              Eq(a.ExtractIndexAndTag<7>()));
+
+  const auto [a_index, a_tag] = a.ExtractIndexAndTag<4>();
+  const auto [b_index, b_tag] = b.ExtractIndexAndTag<4>();
   EXPECT_THAT(a_index, Ne(b_index));
   EXPECT_THAT(a_tag, Ne(b_tag));
 }