Преглед изворни кода

Make pointers in ValueStore stable across insertions (#5576)

This avoids reallocating the backing buffer in ValueStore so that
references into the ValueStore are never invalidated when adding new
values. This works especially well since we never delete values from a
ValueStore.

The strategy used is to allocate chunks of a fixed size, and inserting
into each chunk until it is full before allocating the next. The
ValueStore starts with an initial allocated chunk in all cases, so that
there is only a single indirection for adding and accessing values from
this chunk. After it's full, additional chunks are allocated in a
vector, so two indirections are required to add or access values in
these chunks.

This obviates the need for
https://github.com/carbon-language/carbon-lang/pull/5529 as we no longer
need to worry about holding pointers into a ValueStore.

We introduce a Flatten operation for ranges. It flattens a "range over
ranges over Ts" down to a "range over Ts". This allows us to make an
range over the values in the ValueStore from a range over the chunks in
the ValueStore. See
https://doc.rust-lang.org/stable/std/iter/trait.Iterator.html#method.flatten
for inspiration for this name choice. Flatten is used in one other case
where we were writing two levels of for loops to do the same thing.

The `array_ref()` accessor is changed to `values()` and its now a range
(typed as a `ValueStoreRange`) over all values as references (like
ArrayRef was, but without random access).

As pointers to a ValueStore can no longer be invalidated, we remove the
ASAN poisoning feature and support from ValueStore.

This may cause a regression in our compile benchmark of up to 5%, though
that is close to or within the noise of the benchmark. We can look at
ways to optimize things further in the future. Perhaps by tuning the
chunk size further, or by making later chunks larger than earlier
chunks, or other strategies.
Dana Jansens пре 11 месеци
родитељ
комит
02fc484f23

+ 0 - 17
bazel/cc_toolchains/clang_cc_toolchain_config.bzl

@@ -643,22 +643,6 @@ def _impl(ctx):
         ],
     )
 
-    # A feature that enables poisoning of value stores to detect use after
-    # potential reallocation bugs.
-    #
-    # TODO: Remove this and leave poisoning always on once these bugs are
-    # fixed.
-    poison_value_stores = feature(
-        name = "poison_value_stores",
-        requires = [feature_set(["asan"])],
-        flag_sets = [flag_set(
-            actions = all_compile_actions,
-            flag_groups = [flag_group(flags = [
-                "-DCARBON_POISON_VALUE_STORES=1",
-            ])],
-        )],
-    )
-
     fuzzer = feature(
         name = "fuzzer",
         flag_sets = [flag_set(
@@ -1153,7 +1137,6 @@ def _impl(ctx):
         asan,
         asan_min_size,
         enable_in_fastbuild,
-        poison_value_stores,
         fuzzer,
         layering_check,
         module_maps,

+ 4 - 1
toolchain/base/BUILD

@@ -86,7 +86,10 @@ cc_library(
 
 cc_library(
     name = "value_store",
-    hdrs = ["value_store.h"],
+    hdrs = [
+        "value_store.h",
+        "value_store_chunk.h",
+    ],
     deps = [
         ":mem_usage",
         ":yaml",

+ 2 - 2
toolchain/base/int.h

@@ -346,8 +346,8 @@ class IntStore {
   // into the ID space.
   auto OutputYaml() const -> Yaml::OutputMapping;
 
-  auto array_ref() const -> llvm::ArrayRef<llvm::APInt> {
-    return values_.array_ref();
+  auto values() const [[clang::lifetimebound]] -> auto {
+    return values_.values();
   }
   auto size() const -> size_t { return values_.size(); }
 

+ 106 - 137
toolchain/base/value_store.h

@@ -7,6 +7,7 @@
 
 #include <memory>
 #include <type_traits>
+#include <utility>
 
 #include "common/check.h"
 #include "common/hashtable_key_context.h"
@@ -15,9 +16,11 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Sequence.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Compiler.h"
 #include "toolchain/base/mem_usage.h"
+#include "toolchain/base/value_store_chunk.h"
 #include "toolchain/base/yaml.h"
 
 namespace Carbon {
@@ -29,29 +32,22 @@ namespace Internal {
 class ValueStoreNotPrintable {};
 }  // namespace Internal
 
-// Setup our compile time condition controlling poisoning of value stores. This
-// is set to one by the Bazel flag `--features=poison_value_stores`.
-//
-// TODO: Eventually, this will always enabled when ASan is enabled, but we can't
-// do that until we clean up all of the latent bugs.
-#ifndef CARBON_POISON_VALUE_STORES
-#define CARBON_POISON_VALUE_STORES 0
-#elif !LLVM_ADDRESS_SANITIZER_BUILD
-#error "CARBON_POISON_VALUE_STORES requires address sanitizer"
-#endif
+template <class IdT>
+class ValueStoreRange;
 
 // A simple wrapper for accumulating values, providing IDs to later retrieve the
 // value. This does not do deduplication.
 //
 // IdT::ValueType must represent the type being indexed.
 template <typename IdT>
+  requires(Internal::IdHasValueType<IdT>)
 class ValueStore
     : public std::conditional<
           std::is_base_of_v<Printable<typename IdT::ValueType>,
                             typename IdT::ValueType>,
           Yaml::Printable<ValueStore<IdT>>, Internal::ValueStoreNotPrintable> {
  public:
-  using ValueType = typename IdT::ValueType;
+  using ValueType = std::decay_t<typename IdT::ValueType>;
 
   // Typically we want to use `ValueType&` and `const ValueType& to avoid
   // copies, but when the value type is a `StringRef`, we assume external
@@ -64,83 +60,61 @@ class ValueStore
                          llvm::StringRef, const ValueType&>;
 
   ValueStore() = default;
-  ValueStore(ValueStore&& other) noexcept
-      : values_((other.UnpoisonAll(), std::move(other.values_)))
-#if CARBON_POISON_VALUE_STORES
-        ,
-        all_poisoned_(false)
-#endif
-  {
-    PoisonAll();
-  }
-  auto operator=(ValueStore&& other) noexcept -> ValueStore& {
-    UnpoisonAll();
-    other.UnpoisonAll();
-    values_ = std::move(other.values_);
-#if CARBON_POISON_VALUE_STORES
-    all_poisoned_ = false;
-#endif
-    PoisonAll();
-    return *this;
-  }
-  ~ValueStore() { UnpoisonAll(); }
 
   // Stores the value and returns an ID to reference it.
   auto Add(ValueType value) -> IdT {
-    IdT id(values_.size());
     // This routine is especially hot and the check here relatively expensive
-    // for the value provided, so only do this in debug builds to make tracking
-    // down issues easier.
-    CARBON_DCHECK(id.index >= 0, "Id overflow");
-
-    bool realloc = values_.capacity() == values_.size();
-    if (realloc) {
-      // Unpoison everything if the push will reallocate, in order to allow the
-      // vector to make a copy of the elements.
-      UnpoisonAll();
-    } else {
-      PoisonAll();
-    }
-
-    values_.push_back(std::move(value));
-
-    if (realloc) {
-      PoisonAll();
-    } else {
-      PoisonElement(id.index);
+    // for the value provided, so only do this in non-optimized builds to make
+    // tracking down issues easier.
+    CARBON_DCHECK(size_ < std::numeric_limits<int32_t>::max(), "Id overflow");
+
+    IdT id(size_);
+    auto [chunk_index, pos] = Internal::IdToChunkIndices(id);
+    ++size_;
+
+    CARBON_DCHECK(static_cast<size_t>(chunk_index) <= chunks_.size(),
+                  "{0} <= {1}", chunk_index, chunks_.size());
+    if (static_cast<size_t>(chunk_index) == chunks_.size()) {
+      chunks_.emplace_back();
     }
 
+    CARBON_DCHECK(pos == chunks_[chunk_index].size());
+    chunks_[chunk_index].push(std::move(value));
     return id;
   }
 
   // Returns a mutable value for an ID.
   auto Get(IdT id) -> RefType {
     CARBON_DCHECK(id.index >= 0, "{0}", id);
-    UnpoisonElement(id.index);
-    return values_[id.index];
+    CARBON_DCHECK(id.index < size_, "{0}", id);
+    auto [chunk_index, pos] = Internal::IdToChunkIndices(id);
+    return chunks_[chunk_index].at(pos);
   }
 
   // Returns the value for an ID.
   auto Get(IdT id) const -> ConstRefType {
     CARBON_DCHECK(id.index >= 0, "{0}", id);
-    UnpoisonElement(id.index);
-    return values_[id.index];
+    CARBON_DCHECK(id.index < size_, "{0}", id);
+    auto [chunk_index, pos] = Internal::IdToChunkIndices(id);
+    return chunks_[chunk_index].at(pos);
   }
 
   // Reserves space.
   auto Reserve(size_t size) -> void {
-    UnpoisonAll();
-    values_.reserve(size);
-    PoisonAll();
+    // We get the number of chunks needed to satisfy `size` by rounding any
+    // partial result up.
+    size_t num_more_chunks =
+        (size + ChunkType::Capacity - 1) / ChunkType::Capacity;
+    if (chunks_.size() < num_more_chunks) {
+      // We resize() rather than reserve() here to create the new `ChunkType`
+      // objects, which will in turn allocate space for values in those chunks
+      // (but not initialize them).
+      chunks_.resize(num_more_chunks);
+    }
   }
 
-  // Invalidates all current pointers and references into the value store. Used
-  // in debug builds to trigger use-after-invalidation bugs.
-  auto Invalidate() -> void { PoisonAll(); }
-
   // These are to support printable structures, and are not guaranteed.
   auto OutputYaml() const -> Yaml::OutputMapping {
-    UnpoisonAll();
     return Yaml::OutputMapping([&](Yaml::OutputMapping::Map map) {
       for (auto [id, value] : enumerate()) {
         map.Add(PrintToString(id), Yaml::OutputScalar(value));
@@ -151,15 +125,16 @@ class ValueStore
   // Collects memory usage of the values.
   auto CollectMemUsage(MemUsage& mem_usage, llvm::StringRef label) const
       -> void {
-    UnpoisonAll();
-    mem_usage.Collect(label.str(), values_);
+    mem_usage.Add(label.str(), size_ * sizeof(ValueType),
+                  ChunkType::CapacityBytes * chunks_.size());
   }
 
-  auto array_ref() const -> llvm::ArrayRef<ValueType> {
-    UnpoisonAll();
-    return values_;
+  auto size() const -> size_t { return size_; }
+
+  // Makes an iterable range over references to all values in the ValueStore.
+  auto values() const [[clang::lifetimebound]] -> ValueStoreRange<IdT> {
+    return ValueStoreRange<IdT>(*this);
   }
-  auto size() const -> size_t { return values_.size(); }
 
   // Makes an iterable range over pairs of the index and a reference to the
   // value for each value in the store.
@@ -170,63 +145,63 @@ class ValueStore
   // ```
   // for (auto [id, value] : store.enumerate()) { ... }
   // ```
-  auto enumerate() const -> auto {
-    UnpoisonAll();
-    auto index_to_id = [](auto pair) -> std::pair<IdT, ConstRefType> {
-      auto [index, value] = pair;
-      return std::pair<IdT, ConstRefType>(IdT(index), value);
+  auto enumerate() const [[clang::lifetimebound]] -> auto {
+    auto index_to_id = [&](int32_t i) -> std::pair<IdT, ConstRefType> {
+      return std::pair<IdT, ConstRefType>(IdT(i), Get(IdT(i)));
     };
-    return llvm::map_range(llvm::enumerate(values_), index_to_id);
+    // Because indices into `ValueStore` are all sequential values from 0, we
+    // can use llvm::seq to walk all indices in the store.
+    return llvm::map_range(llvm::seq(size_), index_to_id);
   }
 
  private:
-  // Poison the entire contents of the value store. This is used to detect cases
-  // where references to elements in a value store are used across calls that
-  // might modify the store.
-  auto PoisonAll() const -> void {
-#if CARBON_POISON_VALUE_STORES
-    if (!all_poisoned_) {
-      __asan_poison_memory_region(values_.data(),
-                                  values_.size() * sizeof(values_[0]));
-      all_poisoned_ = true;
-    }
-#endif
-  }
-  // Unpoison the entire contents of the value store.
-  auto UnpoisonAll() const -> void {
-#if CARBON_POISON_VALUE_STORES
-    __asan_unpoison_memory_region(values_.data(),
-                                  values_.size() * sizeof(values_[0]));
-    all_poisoned_ = false;
-#endif
-  }
-  // Poison a single element.
-  auto PoisonElement([[maybe_unused]] int element) const -> void {
-#if CARBON_POISON_VALUE_STORES
-    __asan_unpoison_memory_region(values_.data() + element, sizeof(values_[0]));
-#endif
-  }
-  // Unpoison a single element.
-  auto UnpoisonElement([[maybe_unused]] int element) const -> void {
-#if CARBON_POISON_VALUE_STORES
-    __asan_unpoison_memory_region(values_.data() + element, sizeof(values_[0]));
-    all_poisoned_ = false;
-#endif
-  }
+  friend class ValueStoreRange<IdT>;
+
+  using ChunkType = Internal::ValueStoreChunk<IdT, ValueType>;
+
+  // Number of elements added to the store. The number should never exceed what
+  // fits in an `int32_t`, which is checked in non-optimized builds in Add().
+  int32_t size_ = 0;
+
+  // Storage for the `ValueType` objects, indexed by the id. We use a vector of
+  // chunks of `ValueType` instead of just a vector of `ValueType` so that
+  // addresses of `ValueType` objects are stable. This allows the rest of the
+  // toolchain to hold references into `ValueStore` without having to worry
+  // about invalidation and use-after-free. We ensure at least one Chunk is held
+  // inline so that in the case where there is only a single Chunk (i.e. small
+  // files) we can avoid one indirection.
+  llvm::SmallVector<ChunkType, 1> chunks_;
+};
 
-  // Set inline size to 0 because these will typically be too large for the
-  // stack, while this does make File smaller.
-  llvm::SmallVector<std::decay_t<ValueType>, 0> values_;
+// A range over references to the values in a ValueStore, returned from
+// `ValueStore::values()`. Hides the complex type name of the iterator
+// internally to provide a type name (`ValueStoreRange<IdT>`) that can be
+// referred to without auto and templates.
+template <class IdT>
+class ValueStoreRange {
+ public:
+  explicit ValueStoreRange(const ValueStore<IdT>& store
+                           [[clang::lifetimebound]])
+      : flattened_range_(MakeFlattenedRange(store)) {}
 
-#if CARBON_POISON_VALUE_STORES
-  // Whether the vector is currently fully poisoned.
-  //
-  // We use this to avoid repeated re-poisoning of the entire store. Doing so is
-  // linear in the size of the store, and we trigger re-poisoning frequently,
-  // for example on each import. Tracking that here allows us to coalesce these
-  // into a single linear operation.
-  mutable bool all_poisoned_ = true;
-#endif
+  auto begin() const -> auto { return flattened_range_.begin(); }
+  auto end() const -> auto { return flattened_range_.end(); }
+
+ private:
+  // Flattens the range of `ValueStoreChunk`s of `ValueType`s into a single
+  // range of `ValueType`s.
+  static auto MakeFlattenedRange(const ValueStore<IdT>& store) -> auto {
+    // Because indices into `ValueStore` are all sequential values from 0, we
+    // can use llvm::seq to walk all indices in the store.
+    return llvm::map_range(llvm::seq(store.size_),
+                           [&](int32_t i) -> ValueStore<IdT>::ConstRefType {
+                             return store.Get(IdT(i));
+                           });
+  }
+
+  using FlattenedRangeType =
+      decltype(MakeFlattenedRange(std::declval<const ValueStore<IdT>&>()));
+  FlattenedRangeType flattened_range_;
 };
 
 // A wrapper for accumulating immutable values with deduplication, providing IDs
@@ -253,17 +228,13 @@ class CanonicalValueStore {
   // Reserves space.
   auto Reserve(size_t size) -> void;
 
-  // Invalidates all current pointers and references into the value store. Used
-  // in debug builds to trigger use-after-invalidation bugs.
-  auto Invalidate() -> void { values_.Invalidate(); }
-
   // These are to support printable structures, and are not guaranteed.
   auto OutputYaml() const -> Yaml::OutputMapping {
     return values_.OutputYaml();
   }
 
-  auto array_ref() const -> llvm::ArrayRef<ValueType> {
-    return values_.array_ref();
+  auto values() const [[clang::lifetimebound]] -> ValueStoreRange<IdT> {
+    return values_.values();
   }
   auto size() const -> size_t { return values_.size(); }
 
@@ -271,8 +242,7 @@ class CanonicalValueStore {
   auto CollectMemUsage(MemUsage& mem_usage, llvm::StringRef label) const
       -> void {
     mem_usage.Collect(MemUsage::ConcatLabel(label, "values_"), values_);
-    auto bytes =
-        set_.ComputeMetrics(KeyContext(values_.array_ref())).storage_bytes;
+    auto bytes = set_.ComputeMetrics(KeyContext(&values_)).storage_bytes;
     mem_usage.Add(MemUsage::ConcatLabel(label, "set_"), bytes, bytes);
   }
 
@@ -287,27 +257,27 @@ template <typename IdT>
 class CanonicalValueStore<IdT>::KeyContext
     : public TranslatingKeyContext<KeyContext> {
  public:
-  explicit KeyContext(llvm::ArrayRef<ValueType> values) : values_(values) {}
+  explicit KeyContext(const ValueStore<IdT>* values) : values_(values) {}
 
   // Note that it is safe to return a `const` reference here as the underlying
-  // object's lifetime is provided by the `store_`.
-  auto TranslateKey(IdT id) const -> const ValueType& {
-    return values_[id.index];
+  // object's lifetime is provided by the `ValueStore`.
+  auto TranslateKey(IdT id) const -> ValueStore<IdT>::ConstRefType {
+    return values_->Get(id);
   }
 
  private:
-  llvm::ArrayRef<ValueType> values_;
+  const ValueStore<IdT>* values_;
 };
 
 template <typename IdT>
 auto CanonicalValueStore<IdT>::Add(ValueType value) -> IdT {
   auto make_key = [&] { return IdT(values_.Add(std::move(value))); };
-  return set_.Insert(value, make_key, KeyContext(values_.array_ref())).key();
+  return set_.Insert(value, make_key, KeyContext(&values_)).key();
 }
 
 template <typename IdT>
 auto CanonicalValueStore<IdT>::Lookup(ValueType value) const -> IdT {
-  if (auto result = set_.Lookup(value, KeyContext(values_.array_ref()))) {
+  if (auto result = set_.Lookup(value, KeyContext(&values_))) {
     return result.key();
   }
   return IdT::None;
@@ -318,8 +288,7 @@ auto CanonicalValueStore<IdT>::Reserve(size_t size) -> void {
   // Compute the resulting new insert count using the size of values -- the
   // set doesn't have a fast to compute current size.
   if (size > values_.size()) {
-    set_.GrowForInsertCount(size - values_.size(),
-                            KeyContext(values_.array_ref()));
+    set_.GrowForInsertCount(size - values_.size(), KeyContext(&values_));
   }
   values_.Reserve(size);
 }

+ 164 - 0
toolchain/base/value_store_chunk.h

@@ -0,0 +1,164 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef CARBON_TOOLCHAIN_BASE_VALUE_STORE_CHUNK_H_
+#define CARBON_TOOLCHAIN_BASE_VALUE_STORE_CHUNK_H_
+
+#include <bit>
+#include <cstddef>
+#include <limits>
+#include <memory>
+#include <type_traits>
+#include <utility>
+
+#include "common/check.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/MemAlloc.h"
+#include "toolchain/base/mem_usage.h"
+
+namespace Carbon::Internal {
+
+// Ids which are stored in a `ValueStore` have a `ValueType` which indicates the
+// type of value held in the `ValueStore`.
+template <class IdT>
+concept IdHasValueType = requires { typename IdT::ValueType; };
+
+// The max size of each chunk allocation for `ValueStore`. This is based on TLB
+// page sizes for the target platform.
+//
+// See https://docs.kernel.org/admin-guide/mm/hugetlbpage.html
+//
+// A 4K chunk size outperforms a 1M chunk size on Linux-x64 and MacOS-arm64 in
+// benchmarks and when running file_test.
+//
+// Linux-x64: x64 CPUs support 4K and 2M page sizes, but we see 1M is slower
+// than 4K with tcmalloc in opt builds for our tests.
+//
+// Mac-arm64: arm64 CPUs support 4K, 8K, 64K, 256K, 1M, 4M and up. Like for
+// Linux-x64, 4K outperformed 1M. We didn't try other sizes yet.
+//
+// TODO: Is there a more optimize size for Mac-arm64? What should Linux-arm64
+// and Mac-x64 use? What should Windows use?
+//
+// TODO: The previous SmallVector<ValueType> seems to outperform 4K chunks (they
+// may be slower by up to 5%) in benchmarks. Find ways to make chunking faster.
+// Should successive chunks get larger in size? That will greatly complicate
+// math for choosing a chunk though.
+template <class IdT>
+  requires(IdHasValueType<IdT>)
+static constexpr auto PlatformChunkMaxAllocationBytes() -> int32_t {
+#if !defined(NDEBUG) || LLVM_ADDRESS_SANITIZER_BUILD
+  // Use a small size in unoptimized builds to ensure multiple chunks get used.
+  // And do the same in ASAN builds to reduce bookkeeping overheads. Using large
+  // allocations (e.g. 1M+) incurs a 10x runtime cost for our tests under ASAN.
+  return sizeof(typename IdT::ValueType) * 5;
+#else
+  return 4 * 1024;
+#endif
+}
+
+// The number of elements stored in each chunk allocation.
+//
+// The number must be a power of two so that that there are no unused values in
+// bits indexing into the allocation.
+template <class IdT>
+  requires(IdHasValueType<IdT>)
+constexpr auto PlatformChunkCapacity() -> int32_t {
+  constexpr auto MaxElements =
+      PlatformChunkMaxAllocationBytes<IdT>() / sizeof(typename IdT::ValueType);
+  return std::bit_floor(MaxElements);
+}
+
+// The number of bits needed to index each element in a chunk allocation.
+template <class IdT>
+  requires(IdHasValueType<IdT>)
+constexpr auto PlatformChunkIndexBits() -> int32_t {
+  static_assert(PlatformChunkCapacity<IdT>() > 0);
+  return std::bit_width(uint32_t{PlatformChunkCapacity<IdT>() - 1});
+}
+
+// Converts an id into an index into the set of chunks, and an offset into that
+// specific chunk. Looks for index overflow in non-optimized builds.
+template <typename IdT>
+  requires(IdHasValueType<IdT>)
+inline auto IdToChunkIndices(IdT id) -> std::pair<int32_t, int32_t> {
+  constexpr auto LowBits = PlatformChunkIndexBits<IdT>();
+
+  // Verify there are no unused bits when indexing up to the
+  // PlatformChunkCapacity(). This ensures that ids are contiguous values
+  // from 0, as if the values were all stored in a single array, and allows
+  // using the ids to index into other arrays.
+  static_assert((1 << LowBits) == PlatformChunkCapacity<IdT>());
+  // Simple check to make sure nothing went wildly wrong with the
+  // PlatformChunkCapacity, and we have some room for a chunk index, and
+  // that shifting by the number of bits won't be UB in an int32_t.
+  static_assert(LowBits < 30);
+
+  // The index of the chunk is the high bits.
+  auto chunk = id.index >> LowBits;
+  // The index into the chunk is the low bits.
+  auto pos = id.index & ((1 << LowBits) - 1);
+  return {chunk, pos};
+}
+
+// A chunk of `ValueType`s which has a fixed capacity, but variable size. Tracks
+// the size internally for verifying bounds.
+template <typename IdT, class ValueType>
+struct ValueStoreChunk {
+ public:
+  static constexpr auto Capacity = Internal::PlatformChunkCapacity<IdT>();
+  static constexpr auto CapacityBytes = Capacity * sizeof(ValueType);
+
+  explicit ValueStoreChunk()
+      : buf_(reinterpret_cast<ValueType*>(
+            llvm::allocate_buffer(CapacityBytes, alignof(ValueType)))) {}
+
+  // Moving leaves nullptr behind in the moved-from object so that the
+  // destructor is a no-op (preventing double free).
+  ValueStoreChunk(ValueStoreChunk&& rhs) noexcept
+      : buf_(std::exchange(rhs.buf_, nullptr)), num_(rhs.num_) {}
+
+  auto operator=(ValueStoreChunk&& rhs) noexcept -> ValueStoreChunk& {
+    buf_ = std::exchange(rhs.buf_, nullptr);
+    num_ = rhs.num_;
+    return *this;
+  }
+
+  ~ValueStoreChunk() {
+    if (buf_) {
+      if constexpr (!std::is_trivially_destructible_v<ValueType>) {
+        std::destroy_n(buf_, num_);
+      }
+      llvm::deallocate_buffer(buf_, CapacityBytes, alignof(ValueType));
+    }
+  }
+
+  auto at(int32_t i) -> ValueType& {
+    CARBON_CHECK(i < num_, "{0}", i);
+    return buf_[i];
+  }
+  auto at(int32_t i) const -> const ValueType& {
+    CARBON_CHECK(i < num_, "{0}", i);
+    return buf_[i];
+  }
+
+  auto push(ValueType&& value) -> void {
+    CARBON_CHECK(num_ < Capacity);
+    std::construct_at(buf_ + num_, std::move(value));
+    ++num_;
+  }
+
+  auto size() const -> int32_t { return num_; }
+
+ private:
+  // Verify using an `int32_t` for `num_` is sound.
+  static_assert(Capacity <= std::numeric_limits<int32_t>::max());
+
+  ValueType* buf_;
+  int32_t num_ = 0;
+};
+
+}  // namespace Carbon::Internal
+
+#endif  // CARBON_TOOLCHAIN_BASE_VALUE_STORE_CHUNK_H_

+ 2 - 2
toolchain/check/check_unit.cpp

@@ -213,7 +213,7 @@ auto CheckUnit::CollectTransitiveImports(SemIR::InstId import_decl_id,
     bool is_export = results[direct_index].is_export;
 
     for (const auto& indirect_ir :
-         results[direct_index].sem_ir->import_irs().array_ref()) {
+         results[direct_index].sem_ir->import_irs().values()) {
       if (!indirect_ir.is_export) {
         continue;
       }
@@ -416,7 +416,7 @@ auto CheckUnit::ProcessNodeIds() -> bool {
 }
 
 auto CheckUnit::CheckRequiredDeclarations() -> void {
-  for (const auto& function : context_.functions().array_ref()) {
+  for (const auto& function : context_.functions().values()) {
     if (!function.first_owning_decl_id.has_value() &&
         function.extern_library_id == context_.sem_ir().library_id()) {
       auto function_import_id =

+ 1 - 1
toolchain/check/handle_index.cpp

@@ -55,7 +55,7 @@ static auto GetIndexWithArgs(Context& context, Parse::NodeId node_id,
     return std::nullopt;
   }
 
-  for (const auto& impl : context.impls().array_ref()) {
+  for (const auto& impl : context.impls().values()) {
     auto impl_self_type_id =
         context.types().GetTypeIdForTypeInstId(impl.self_id);
     auto impl_constraint_type_id =

+ 1 - 1
toolchain/check/impl_validation.cpp

@@ -367,7 +367,7 @@ static auto ImportFinalImplsWithImplInFile(Context& context) -> void {
   };
 
   llvm::SmallVector<InterfaceToImport> interfaces_to_import;
-  for (const auto& impl : context.impls().array_ref()) {
+  for (const auto& impl : context.impls().values()) {
     if (impl.witness_id == SemIR::ErrorInst::InstId) {
       continue;
     }

+ 0 - 14
toolchain/check/import_ref.cpp

@@ -3324,20 +3324,6 @@ static auto GetInstForLoad(Context& context,
 
 // NOLINTNEXTLINE(misc-no-recursion)
 auto LoadImportRef(Context& context, SemIR::InstId inst_id) -> void {
-#if LLVM_ADDRESS_SANITIZER_BUILD
-  // Under ASan, invalidate all of our value stores on any import in order to
-  // flush out bugs where pointers and references to entities are held across
-  // imports.
-  context.classes().Invalidate();
-  context.entity_names().Invalidate();
-  context.facet_types().Invalidate();
-  context.functions().Invalidate();
-  context.generics().Invalidate();
-  context.impls().Invalidate();
-  context.interfaces().Invalidate();
-  context.specifics().Invalidate();
-#endif
-
   auto inst = context.insts().TryGetAs<SemIR::ImportRefUnloaded>(inst_id);
   if (!inst) {
     return;

+ 1 - 1
toolchain/lower/file_context.cpp

@@ -90,7 +90,7 @@ auto FileContext::PrepareToLower() -> void {
 
 // TODO: Move this to lower.cpp.
 auto FileContext::LowerDefinitions() -> void {
-  for (const auto& class_info : sem_ir_->classes().array_ref()) {
+  for (const auto& class_info : sem_ir_->classes().values()) {
     if (auto* llvm_vtable = BuildVtable(class_info)) {
       global_variables_.Insert(class_info.vtable_id, llvm_vtable);
     }

+ 1 - 1
toolchain/sem_ir/file.cpp

@@ -62,7 +62,7 @@ auto File::Verify() const -> ErrorOr<Success> {
 
   // Check that every code block has a terminator sequence that appears at the
   // end of the block.
-  for (const Function& function : functions_.array_ref()) {
+  for (const Function& function : functions_.values()) {
     for (InstBlockId block_id : function.body_block_ids) {
       TerminatorKind prior_kind = TerminatorKind::NotTerminator;
       for (InstId inst_id : inst_blocks().Get(block_id)) {

+ 1 - 1
toolchain/sem_ir/formatter.cpp

@@ -1158,7 +1158,7 @@ auto Formatter::FormatCallRhs(Call inst) -> void {
 auto Formatter::FormatImportCppDeclRhs() -> void {
   out_ << " ";
   OpenBrace();
-  for (ImportCpp import_cpp : sem_ir_->import_cpps().array_ref()) {
+  for (ImportCpp import_cpp : sem_ir_->import_cpps().values()) {
     Indent();
     out_ << "import Cpp \""
          << FormatEscaped(

+ 5 - 5
toolchain/sem_ir/generic.cpp

@@ -19,16 +19,16 @@ class SpecificStore::KeyContext : public TranslatingKeyContext<KeyContext> {
     friend auto operator==(const Key&, const Key&) -> bool = default;
   };
 
-  explicit KeyContext(llvm::ArrayRef<Specific> specifics)
+  explicit KeyContext(const ValueStore<SpecificId>* specifics)
       : specifics_(specifics) {}
 
   auto TranslateKey(SpecificId id) const -> Key {
-    const auto& specific = specifics_[id.index];
+    const auto& specific = specifics_->Get(id);
     return {.generic_id = specific.generic_id, .args_id = specific.args_id};
   }
 
  private:
-  llvm::ArrayRef<Specific> specifics_;
+  const ValueStore<SpecificId>* specifics_;
 };
 
 auto SpecificStore::GetOrAdd(GenericId generic_id, InstBlockId args_id)
@@ -41,7 +41,7 @@ auto SpecificStore::GetOrAdd(GenericId generic_id, InstBlockId args_id)
             return specifics_.Add(
                 {.generic_id = generic_id, .args_id = args_id});
           },
-          KeyContext(specifics_.array_ref()))
+          KeyContext(&specifics_))
       .key();
 }
 
@@ -49,7 +49,7 @@ auto SpecificStore::CollectMemUsage(MemUsage& mem_usage,
                                     llvm::StringRef label) const -> void {
   mem_usage.Collect(MemUsage::ConcatLabel(label, "specifics_"), specifics_);
   mem_usage.Collect(MemUsage::ConcatLabel(label, "lookup_table_"),
-                    lookup_table_, KeyContext(specifics_.array_ref()));
+                    lookup_table_, KeyContext(&specifics_));
 }
 
 static auto GetConstantInSpecific(const File& sem_ir, SpecificId specific_id,

+ 5 - 7
toolchain/sem_ir/generic.h

@@ -127,10 +127,6 @@ class SpecificStore : public Yaml::Printable<SpecificStore> {
                                    : InstBlockId::Empty;
   }
 
-  // Invalidates all current pointers and references into the value store. Used
-  // in debug builds to trigger use-after-invalidation bugs.
-  auto Invalidate() -> void { specifics_.Invalidate(); }
-
   // These are to support printable structures, and are not guaranteed.
   auto OutputYaml() const -> Yaml::OutputMapping {
     return specifics_.OutputYaml();
@@ -140,11 +136,13 @@ class SpecificStore : public Yaml::Printable<SpecificStore> {
   auto CollectMemUsage(MemUsage& mem_usage, llvm::StringRef label) const
       -> void;
 
-  auto array_ref() const -> llvm::ArrayRef<Specific> {
-    return specifics_.array_ref();
+  auto values() const [[clang::lifetimebound]] -> ValueStoreRange<SpecificId> {
+    return specifics_.values();
   }
   auto size() const -> size_t { return specifics_.size(); }
-  auto enumerate() const -> auto { return specifics_.enumerate(); }
+  auto enumerate() const [[clang::lifetimebound]] -> auto {
+    return specifics_.enumerate();
+  }
 
  private:
   // Context for hashing keys.

+ 6 - 6
toolchain/sem_ir/impl.h

@@ -181,10 +181,6 @@ class ImplStore {
   // Returns the value for an ID.
   auto Get(ImplId id) const -> const Impl& { return values_.Get(id); }
 
-  // Invalidates all current pointers and references into the value store. Used
-  // in debug builds to trigger use-after-invalidation bugs.
-  auto Invalidate() -> void { values_.Invalidate(); }
-
   auto OutputYaml() const -> Yaml::OutputMapping {
     return values_.OutputYaml();
   }
@@ -196,9 +192,13 @@ class ImplStore {
     mem_usage.Collect(MemUsage::ConcatLabel(label, "lookup_"), lookup_);
   }
 
-  auto array_ref() const -> llvm::ArrayRef<Impl> { return values_.array_ref(); }
+  auto values() const [[clang::lifetimebound]] -> ValueStoreRange<ImplId> {
+    return values_.values();
+  }
   auto size() const -> size_t { return values_.size(); }
-  auto enumerate() const -> auto { return values_.enumerate(); }
+  auto enumerate() const [[clang::lifetimebound]] -> auto {
+    return values_.enumerate();
+  }
 
  private:
   File& sem_ir_;

+ 6 - 2
toolchain/sem_ir/inst.h

@@ -530,9 +530,13 @@ class InstStore {
     mem_usage.Collect(MemUsage::ConcatLabel(label, "values_"), values_);
   }
 
-  auto array_ref() const -> llvm::ArrayRef<Inst> { return values_.array_ref(); }
+  auto values() const [[clang::lifetimebound]] -> ValueStoreRange<InstId> {
+    return values_.values();
+  }
   auto size() const -> int { return values_.size(); }
-  auto enumerate() const -> auto { return values_.enumerate(); }
+  auto enumerate() const [[clang::lifetimebound]] -> auto {
+    return values_.enumerate();
+  }
 
  private:
   // Given a symbolic type, get the corresponding unattached type.