소스 검색

Refactor ValueStoreChunk and ValueStoreRange into ValueStore (#5756)

ValueStoreChunk and ValueStoreRange are implemented in a way that's
closely tied to ValueStore, and the separation makes for a lot of
additional template parameter passing, which seems easy to make mistakes
on. Combine types in order to make the close association more implicit.

I also considered passing `ValueT` everywhere, as an additional template
parameter. Note I believe the simplification is important. I'll
highlight four notes that I think favor this approach:

- `ValueStore`, with the chunk type in the same file, now has more of
the closely related implementation features in the same file. I think we
probably will want any chunking to continue to be done by `ValueStore`
itself, with related types using the implementation on `ValueStore` and
never creating their own.
- Making `ValueT` a template parameter on `ValueStore` -- my next step
-- will only change a couple lines of code on this type, instead of
sweeping changes. That should make it easier to be confident of the
correctness of those changes.
- Simpler to verify correctness. For example, `ValueStoreChunk` takes a
`ValueType` parameter that it doesn't forward; other functions assume
they can use `IdT::ValueType`. With the changes, this also no longer
benefits from separating out `IdHasValueType`, which was inconsistently
applied to related types (e.g., `ValueStoreRange` didn't use it).
- Template parameters often lead to `sizeof`, where we can't rely on
type checking to catch mistakes.
- The reduction of code is significant, with 8 `template<...>` removed
(including 1 forward declaration for `ValueStoreRange`), and also the
related `requires`. Correspondingly, places specifying template
parameters also decreased.
Jon Ross-Perkins 10 달 전
부모
커밋
839a7b7c96
7개의 변경된 파일168개의 추가작업 그리고 220개의 파일을 삭제
  1. 1 4
      toolchain/base/BUILD
  2. 1 1
      toolchain/base/canonical_value_store.h
  3. 162 48
      toolchain/base/value_store.h
  4. 0 164
      toolchain/base/value_store_chunk.h
  5. 2 1
      toolchain/sem_ir/generic.h
  6. 1 1
      toolchain/sem_ir/impl.h
  7. 1 1
      toolchain/sem_ir/inst.h

+ 1 - 4
toolchain/base/BUILD

@@ -124,10 +124,7 @@ cc_library(
 
 cc_library(
     name = "value_store",
-    hdrs = [
-        "value_store.h",
-        "value_store_chunk.h",
-    ],
+    hdrs = ["value_store.h"],
     deps = [
         ":mem_usage",
         ":value_store_types",

+ 1 - 1
toolchain/base/canonical_value_store.h

@@ -48,7 +48,7 @@ class CanonicalValueStore {
     return values_.OutputYaml();
   }
 
-  auto values() const [[clang::lifetimebound]] -> ValueStoreRange<IdT> {
+  auto values() const [[clang::lifetimebound]] -> ValueStore<IdT>::Range {
     return values_.values();
   }
   auto size() const -> size_t { return values_.size(); }

+ 162 - 48
toolchain/base/value_store.h

@@ -5,6 +5,8 @@
 #ifndef CARBON_TOOLCHAIN_BASE_VALUE_STORE_H_
 #define CARBON_TOOLCHAIN_BASE_VALUE_STORE_H_
 
+#include <bit>
+#include <cstddef>
 #include <limits>
 #include <type_traits>
 #include <utility>
@@ -14,8 +16,8 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Sequence.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/MemAlloc.h"
 #include "toolchain/base/mem_usage.h"
-#include "toolchain/base/value_store_chunk.h"
 #include "toolchain/base/value_store_types.h"
 #include "toolchain/base/yaml.h"
 
@@ -29,15 +31,12 @@ class ValueStoreNotPrintable {};
 
 }  // namespace Internal
 
-template <class IdT>
-class ValueStoreRange;
-
 // A simple wrapper for accumulating values, providing IDs to later retrieve the
 // value. This does not do deduplication.
 //
 // IdT::ValueType must represent the type being indexed.
 template <typename IdT>
-  requires(Internal::IdHasValueType<IdT>)
+  requires(requires { typename IdT::ValueType; })
 class ValueStore
     : public std::conditional<
           std::is_base_of_v<Printable<typename IdT::ValueType>,
@@ -49,6 +48,34 @@ class ValueStore
   using RefType = ValueStoreTypes<IdT>::RefType;
   using ConstRefType = ValueStoreTypes<IdT>::ConstRefType;
 
+  // A range over references to the values in a ValueStore, returned from
+  // `ValueStore::values()`. Hides the complex type name of the iterator
+  // internally to provide a type name (`Range`) that can be
+  // referred to without auto and templates.
+  class Range {
+   public:
+    explicit Range(const ValueStore& store [[clang::lifetimebound]])
+        : flattened_range_(MakeFlattenedRange(store)) {}
+
+    auto begin() const -> auto { return flattened_range_.begin(); }
+    auto end() const -> auto { return flattened_range_.end(); }
+
+   private:
+    // Flattens the range of `Chunk`s of `ValueType`s into a single
+    // range of `ValueType`s.
+    static auto MakeFlattenedRange(const ValueStore& store) -> auto {
+      // Because indices into `ValueStore` are all sequential values from 0, we
+      // can use llvm::seq to walk all indices in the store.
+      return llvm::map_range(
+          llvm::seq(store.size_),
+          [&](int32_t i) -> ConstRefType { return store.Get(IdT(i)); });
+    }
+
+    using FlattenedRangeType =
+        decltype(MakeFlattenedRange(std::declval<const ValueStore&>()));
+    FlattenedRangeType flattened_range_;
+  };
+
   ValueStore() = default;
 
   // Stores the value and returns an ID to reference it.
@@ -59,7 +86,7 @@ class ValueStore
     CARBON_DCHECK(size_ < std::numeric_limits<int32_t>::max(), "Id overflow");
 
     IdT id(size_);
-    auto [chunk_index, pos] = Internal::IdToChunkIndices(id);
+    auto [chunk_index, pos] = IdToChunkIndices(id);
     ++size_;
 
     CARBON_DCHECK(static_cast<size_t>(chunk_index) <= chunks_.size(),
@@ -77,7 +104,7 @@ class ValueStore
   auto Get(IdT id) -> RefType {
     CARBON_DCHECK(id.index >= 0, "{0}", id);
     CARBON_DCHECK(id.index < size_, "{0}", id);
-    auto [chunk_index, pos] = Internal::IdToChunkIndices(id);
+    auto [chunk_index, pos] = IdToChunkIndices(id);
     return chunks_[chunk_index].at(pos);
   }
 
@@ -85,7 +112,7 @@ class ValueStore
   auto Get(IdT id) const -> ConstRefType {
     CARBON_DCHECK(id.index >= 0, "{0}", id);
     CARBON_DCHECK(id.index < size_, "{0}", id);
-    auto [chunk_index, pos] = Internal::IdToChunkIndices(id);
+    auto [chunk_index, pos] = IdToChunkIndices(id);
     return chunks_[chunk_index].at(pos);
   }
 
@@ -93,8 +120,7 @@ class ValueStore
   auto Reserve(size_t size) -> void {
     // We get the number of chunks needed to satisfy `size` by rounding any
     // partial result up.
-    size_t num_more_chunks =
-        (size + ChunkType::Capacity - 1) / ChunkType::Capacity;
+    size_t num_more_chunks = (size + Chunk::Capacity() - 1) / Chunk::Capacity();
     if (chunks_.size() < num_more_chunks) {
       // We resize() rather than reserve() here to create the new `ChunkType`
       // objects, which will in turn allocate space for values in those chunks
@@ -116,15 +142,13 @@ class ValueStore
   auto CollectMemUsage(MemUsage& mem_usage, llvm::StringRef label) const
       -> void {
     mem_usage.Add(label.str(), size_ * sizeof(ValueType),
-                  ChunkType::CapacityBytes * chunks_.size());
+                  Chunk::CapacityBytes * chunks_.size());
   }
 
   auto size() const -> size_t { return size_; }
 
   // Makes an iterable range over references to all values in the ValueStore.
-  auto values() const [[clang::lifetimebound]] -> ValueStoreRange<IdT> {
-    return ValueStoreRange<IdT>(*this);
-  }
+  auto values() const [[clang::lifetimebound]] -> Range { return Range(*this); }
 
   // Makes an iterable range over pairs of the index and a reference to the
   // value for each value in the store.
@@ -148,9 +172,130 @@ class ValueStore
   }
 
  private:
-  friend class ValueStoreRange<IdT>;
+  // A chunk of `ValueType`s which has a fixed capacity, but variable size.
+  // Tracks the size internally for verifying bounds.
+  struct Chunk {
+   public:
+    // The max size of each chunk allocation for `ValueStore`. This is based on
+    // TLB page sizes for the target platform.
+    //
+    // See https://docs.kernel.org/admin-guide/mm/hugetlbpage.html
+    //
+    // A 4K chunk size outperforms a 1M chunk size on Linux-x64 and MacOS-arm64
+    // in benchmarks and when running file_test.
+    //
+    // Linux-x64: x64 CPUs support 4K and 2M page sizes, but we see 1M is slower
+    // than 4K with tcmalloc in opt builds for our tests.
+    //
+    // Mac-arm64: arm64 CPUs support 4K, 8K, 64K, 256K, 1M, 4M and up. Like for
+    // Linux-x64, 4K outperformed 1M. We didn't try other sizes yet.
+    //
+    // TODO: Is there a more optimize size for Mac-arm64? What should
+    // Linux-arm64 and Mac-x64 use? What should Windows use?
+    //
+    // TODO: The previous SmallVector<ValueType> seems to outperform 4K chunks
+    // (they may be slower by up to 5%) in benchmarks. Find ways to make
+    // chunking faster. Should successive chunks get larger in size? That will
+    // greatly complicate math for choosing a chunk though.
+    static constexpr auto MaxAllocationBytes() -> int32_t {
+#if !defined(NDEBUG) || LLVM_ADDRESS_SANITIZER_BUILD
+      // Use a small size in unoptimized builds to ensure multiple chunks get
+      // used. And do the same in ASAN builds to reduce bookkeeping overheads.
+      // Using large allocations (e.g. 1M+) incurs a 10x runtime cost for our
+      // tests under ASAN.
+      return sizeof(ValueType) * 5;
+#else
+      return 4 * 1024;
+#endif
+    }
+
+    // The number of elements stored in each chunk allocation.
+    //
+    // The number must be a power of two so that that there are no unused values
+    // in bits indexing into the allocation.
+    static constexpr auto Capacity() -> int32_t {
+      constexpr auto MaxElements = MaxAllocationBytes() / sizeof(ValueType);
+      return std::bit_floor(MaxElements);
+    }
+
+    // The number of bits needed to index each element in a chunk allocation.
+    static constexpr auto IndexBits() -> int32_t {
+      static_assert(Capacity() > 0);
+      return std::bit_width(uint32_t{Capacity() - 1});
+    }
+
+    static constexpr auto CapacityBytes = Capacity() * sizeof(ValueType);
+
+    explicit Chunk()
+        : buf_(reinterpret_cast<ValueType*>(
+              llvm::allocate_buffer(CapacityBytes, alignof(ValueType)))) {}
+
+    // Moving leaves nullptr behind in the moved-from object so that the
+    // destructor is a no-op (preventing double free).
+    Chunk(Chunk&& rhs) noexcept
+        : buf_(std::exchange(rhs.buf_, nullptr)), num_(rhs.num_) {}
+
+    auto operator=(Chunk&& rhs) noexcept -> Chunk& {
+      buf_ = std::exchange(rhs.buf_, nullptr);
+      num_ = rhs.num_;
+      return *this;
+    }
+
+    ~Chunk() {
+      if (buf_) {
+        if constexpr (!std::is_trivially_destructible_v<ValueType>) {
+          std::destroy_n(buf_, num_);
+        }
+        llvm::deallocate_buffer(buf_, CapacityBytes, alignof(ValueType));
+      }
+    }
 
-  using ChunkType = Internal::ValueStoreChunk<IdT, ValueType>;
+    auto at(int32_t i) -> ValueType& {
+      CARBON_CHECK(i < num_, "{0}", i);
+      return buf_[i];
+    }
+    auto at(int32_t i) const -> const ValueType& {
+      CARBON_CHECK(i < num_, "{0}", i);
+      return buf_[i];
+    }
+
+    auto push(ValueType&& value) -> void {
+      CARBON_CHECK(num_ < Capacity());
+      std::construct_at(buf_ + num_, std::move(value));
+      ++num_;
+    }
+
+    auto size() const -> int32_t { return num_; }
+
+   private:
+    // Verify using an `int32_t` for `num_` is sound.
+    static_assert(Capacity() <= std::numeric_limits<int32_t>::max());
+
+    ValueType* buf_;
+    int32_t num_ = 0;
+  };
+
+  // Converts an id into an index into the set of chunks, and an offset into
+  // that specific chunk. Looks for index overflow in non-optimized builds.
+  static auto IdToChunkIndices(IdT id) -> std::pair<int32_t, int32_t> {
+    constexpr auto LowBits = Chunk::IndexBits();
+
+    // Verify there are no unused bits when indexing up to the `Capacity`. This
+    // ensures that ids are contiguous values from 0, as if the values were all
+    // stored in a single array, and allows using the ids to index into other
+    // arrays.
+    static_assert((1 << LowBits) == Chunk::Capacity());
+    // Simple check to make sure nothing went wildly wrong with the `Capacity`,
+    // and we have some room for a chunk index, and that shifting by the number
+    // of bits won't be UB in an int32_t.
+    static_assert(LowBits < 30);
+
+    // The index of the chunk is the high bits.
+    auto chunk = id.index >> LowBits;
+    // The index into the chunk is the low bits.
+    auto pos = id.index & ((1 << LowBits) - 1);
+    return {chunk, pos};
+  }
 
   // Number of elements added to the store. The number should never exceed what
   // fits in an `int32_t`, which is checked in non-optimized builds in Add().
@@ -163,38 +308,7 @@ class ValueStore
   // about invalidation and use-after-free. We ensure at least one Chunk is held
   // inline so that in the case where there is only a single Chunk (i.e. small
   // files) we can avoid one indirection.
-  llvm::SmallVector<ChunkType, 1> chunks_;
-};
-
-// A range over references to the values in a ValueStore, returned from
-// `ValueStore::values()`. Hides the complex type name of the iterator
-// internally to provide a type name (`ValueStoreRange<IdT>`) that can be
-// referred to without auto and templates.
-template <class IdT>
-class ValueStoreRange {
- public:
-  explicit ValueStoreRange(const ValueStore<IdT>& store
-                           [[clang::lifetimebound]])
-      : flattened_range_(MakeFlattenedRange(store)) {}
-
-  auto begin() const -> auto { return flattened_range_.begin(); }
-  auto end() const -> auto { return flattened_range_.end(); }
-
- private:
-  // Flattens the range of `ValueStoreChunk`s of `ValueType`s into a single
-  // range of `ValueType`s.
-  static auto MakeFlattenedRange(const ValueStore<IdT>& store) -> auto {
-    // Because indices into `ValueStore` are all sequential values from 0, we
-    // can use llvm::seq to walk all indices in the store.
-    return llvm::map_range(llvm::seq(store.size_),
-                           [&](int32_t i) -> ValueStore<IdT>::ConstRefType {
-                             return store.Get(IdT(i));
-                           });
-  }
-
-  using FlattenedRangeType =
-      decltype(MakeFlattenedRange(std::declval<const ValueStore<IdT>&>()));
-  FlattenedRangeType flattened_range_;
+  llvm::SmallVector<Chunk, 1> chunks_;
 };
 
 }  // namespace Carbon

+ 0 - 164
toolchain/base/value_store_chunk.h

@@ -1,164 +0,0 @@
-// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
-// Exceptions. See /LICENSE for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-#ifndef CARBON_TOOLCHAIN_BASE_VALUE_STORE_CHUNK_H_
-#define CARBON_TOOLCHAIN_BASE_VALUE_STORE_CHUNK_H_
-
-#include <bit>
-#include <cstddef>
-#include <limits>
-#include <memory>
-#include <type_traits>
-#include <utility>
-
-#include "common/check.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/MemAlloc.h"
-#include "toolchain/base/mem_usage.h"
-
-namespace Carbon::Internal {
-
-// Ids which are stored in a `ValueStore` have a `ValueType` which indicates the
-// type of value held in the `ValueStore`.
-template <class IdT>
-concept IdHasValueType = requires { typename IdT::ValueType; };
-
-// The max size of each chunk allocation for `ValueStore`. This is based on TLB
-// page sizes for the target platform.
-//
-// See https://docs.kernel.org/admin-guide/mm/hugetlbpage.html
-//
-// A 4K chunk size outperforms a 1M chunk size on Linux-x64 and MacOS-arm64 in
-// benchmarks and when running file_test.
-//
-// Linux-x64: x64 CPUs support 4K and 2M page sizes, but we see 1M is slower
-// than 4K with tcmalloc in opt builds for our tests.
-//
-// Mac-arm64: arm64 CPUs support 4K, 8K, 64K, 256K, 1M, 4M and up. Like for
-// Linux-x64, 4K outperformed 1M. We didn't try other sizes yet.
-//
-// TODO: Is there a more optimize size for Mac-arm64? What should Linux-arm64
-// and Mac-x64 use? What should Windows use?
-//
-// TODO: The previous SmallVector<ValueType> seems to outperform 4K chunks (they
-// may be slower by up to 5%) in benchmarks. Find ways to make chunking faster.
-// Should successive chunks get larger in size? That will greatly complicate
-// math for choosing a chunk though.
-template <class IdT>
-  requires(IdHasValueType<IdT>)
-static constexpr auto PlatformChunkMaxAllocationBytes() -> int32_t {
-#if !defined(NDEBUG) || LLVM_ADDRESS_SANITIZER_BUILD
-  // Use a small size in unoptimized builds to ensure multiple chunks get used.
-  // And do the same in ASAN builds to reduce bookkeeping overheads. Using large
-  // allocations (e.g. 1M+) incurs a 10x runtime cost for our tests under ASAN.
-  return sizeof(typename IdT::ValueType) * 5;
-#else
-  return 4 * 1024;
-#endif
-}
-
-// The number of elements stored in each chunk allocation.
-//
-// The number must be a power of two so that that there are no unused values in
-// bits indexing into the allocation.
-template <class IdT>
-  requires(IdHasValueType<IdT>)
-constexpr auto PlatformChunkCapacity() -> int32_t {
-  constexpr auto MaxElements =
-      PlatformChunkMaxAllocationBytes<IdT>() / sizeof(typename IdT::ValueType);
-  return std::bit_floor(MaxElements);
-}
-
-// The number of bits needed to index each element in a chunk allocation.
-template <class IdT>
-  requires(IdHasValueType<IdT>)
-constexpr auto PlatformChunkIndexBits() -> int32_t {
-  static_assert(PlatformChunkCapacity<IdT>() > 0);
-  return std::bit_width(uint32_t{PlatformChunkCapacity<IdT>() - 1});
-}
-
-// Converts an id into an index into the set of chunks, and an offset into that
-// specific chunk. Looks for index overflow in non-optimized builds.
-template <typename IdT>
-  requires(IdHasValueType<IdT>)
-inline auto IdToChunkIndices(IdT id) -> std::pair<int32_t, int32_t> {
-  constexpr auto LowBits = PlatformChunkIndexBits<IdT>();
-
-  // Verify there are no unused bits when indexing up to the
-  // PlatformChunkCapacity(). This ensures that ids are contiguous values
-  // from 0, as if the values were all stored in a single array, and allows
-  // using the ids to index into other arrays.
-  static_assert((1 << LowBits) == PlatformChunkCapacity<IdT>());
-  // Simple check to make sure nothing went wildly wrong with the
-  // PlatformChunkCapacity, and we have some room for a chunk index, and
-  // that shifting by the number of bits won't be UB in an int32_t.
-  static_assert(LowBits < 30);
-
-  // The index of the chunk is the high bits.
-  auto chunk = id.index >> LowBits;
-  // The index into the chunk is the low bits.
-  auto pos = id.index & ((1 << LowBits) - 1);
-  return {chunk, pos};
-}
-
-// A chunk of `ValueType`s which has a fixed capacity, but variable size. Tracks
-// the size internally for verifying bounds.
-template <typename IdT, class ValueType>
-struct ValueStoreChunk {
- public:
-  static constexpr auto Capacity = Internal::PlatformChunkCapacity<IdT>();
-  static constexpr auto CapacityBytes = Capacity * sizeof(ValueType);
-
-  explicit ValueStoreChunk()
-      : buf_(reinterpret_cast<ValueType*>(
-            llvm::allocate_buffer(CapacityBytes, alignof(ValueType)))) {}
-
-  // Moving leaves nullptr behind in the moved-from object so that the
-  // destructor is a no-op (preventing double free).
-  ValueStoreChunk(ValueStoreChunk&& rhs) noexcept
-      : buf_(std::exchange(rhs.buf_, nullptr)), num_(rhs.num_) {}
-
-  auto operator=(ValueStoreChunk&& rhs) noexcept -> ValueStoreChunk& {
-    buf_ = std::exchange(rhs.buf_, nullptr);
-    num_ = rhs.num_;
-    return *this;
-  }
-
-  ~ValueStoreChunk() {
-    if (buf_) {
-      if constexpr (!std::is_trivially_destructible_v<ValueType>) {
-        std::destroy_n(buf_, num_);
-      }
-      llvm::deallocate_buffer(buf_, CapacityBytes, alignof(ValueType));
-    }
-  }
-
-  auto at(int32_t i) -> ValueType& {
-    CARBON_CHECK(i < num_, "{0}", i);
-    return buf_[i];
-  }
-  auto at(int32_t i) const -> const ValueType& {
-    CARBON_CHECK(i < num_, "{0}", i);
-    return buf_[i];
-  }
-
-  auto push(ValueType&& value) -> void {
-    CARBON_CHECK(num_ < Capacity);
-    std::construct_at(buf_ + num_, std::move(value));
-    ++num_;
-  }
-
-  auto size() const -> int32_t { return num_; }
-
- private:
-  // Verify using an `int32_t` for `num_` is sound.
-  static_assert(Capacity <= std::numeric_limits<int32_t>::max());
-
-  ValueType* buf_;
-  int32_t num_ = 0;
-};
-
-}  // namespace Carbon::Internal
-
-#endif  // CARBON_TOOLCHAIN_BASE_VALUE_STORE_CHUNK_H_

+ 2 - 1
toolchain/sem_ir/generic.h

@@ -138,7 +138,8 @@ class SpecificStore : public Yaml::Printable<SpecificStore> {
   auto CollectMemUsage(MemUsage& mem_usage, llvm::StringRef label) const
       -> void;
 
-  auto values() const [[clang::lifetimebound]] -> ValueStoreRange<SpecificId> {
+  auto values() const [[clang::lifetimebound]]
+  -> ValueStore<SpecificId>::Range {
     return specifics_.values();
   }
   auto size() const -> size_t { return specifics_.size(); }

+ 1 - 1
toolchain/sem_ir/impl.h

@@ -192,7 +192,7 @@ class ImplStore {
     mem_usage.Collect(MemUsage::ConcatLabel(label, "lookup_"), lookup_);
   }
 
-  auto values() const [[clang::lifetimebound]] -> ValueStoreRange<ImplId> {
+  auto values() const [[clang::lifetimebound]] -> ValueStore<ImplId>::Range {
     return values_.values();
   }
   auto size() const -> size_t { return values_.size(); }

+ 1 - 1
toolchain/sem_ir/inst.h

@@ -592,7 +592,7 @@ class InstStore {
     mem_usage.Collect(MemUsage::ConcatLabel(label, "values_"), values_);
   }
 
-  auto values() const [[clang::lifetimebound]] -> ValueStoreRange<InstId> {
+  auto values() const [[clang::lifetimebound]] -> ValueStore<InstId>::Range {
     return values_.values();
   }
   auto size() const -> int { return values_.size(); }