ソースを参照

Add a fixed-size ValueStore (#5628)

Trying to build a type around the common idiom we have for types based
on an Id range. The primary advantage of this is it makes clear the `Id`
association, and drops the `.index` use.

Lowering was motivating me because it has a few of these, and check
probably has more (e.g. `tree_and_subtrees_getters`), but I'm just
changing a handful of examples to show the concept and see if there's
agreement.

I wanted to inherit from ValueStoreTypes, but name lookup didn't seem to
find the types without `using` statements, at which point there didn't
seem to be much reason to use inheritance.
Jon Ross-Perkins 11 ヶ月 前
コミット
2e297b5258

+ 12 - 0
toolchain/base/BUILD

@@ -84,6 +84,18 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "fixed_size_value_store",
+    hdrs = ["fixed_size_value_store.h"],
+    deps = [
+        ":mem_usage",
+        ":value_store",
+        "//common:check",
+        "//common:move_only",
+        "@llvm-project//llvm:Support",
+    ],
+)
+
 cc_library(
     name = "value_store",
     hdrs = [

+ 74 - 0
toolchain/base/fixed_size_value_store.h

@@ -0,0 +1,74 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef CARBON_TOOLCHAIN_BASE_FIXED_SIZE_VALUE_STORE_H_
+#define CARBON_TOOLCHAIN_BASE_FIXED_SIZE_VALUE_STORE_H_
+
+#include "common/check.h"
+#include "common/move_only.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "toolchain/base/mem_usage.h"
+#include "toolchain/base/value_store.h"
+
+namespace Carbon {
+
+// A value store with a predetermined size.
+template <typename IdT, typename ValueT>
+class FixedSizeValueStore : public MoveOnly<FixedSizeValueStore<IdT, ValueT>> {
+ public:
+  using ValueType = ValueStoreTypes<IdT, ValueT>::ValueType;
+  using RefType = ValueStoreTypes<IdT, ValueT>::RefType;
+  using ConstRefType = ValueStoreTypes<IdT, ValueT>::ConstRefType;
+
+  // Makes a ValueStore of the specified size, but without initializing values.
+  // Entries must be set before reading.
+  static auto MakeForOverwrite(size_t size) -> FixedSizeValueStore {
+    FixedSizeValueStore store;
+    store.values_.resize_for_overwrite(size);
+    return store;
+  }
+
+  // Makes a ValueStore of the specified size, initialized to a default.
+  explicit FixedSizeValueStore(size_t size, ValueT default_value) {
+    values_.resize(size, default_value);
+  }
+
+  // Sets the value for an ID.
+  auto Set(IdT id, ValueType value) -> void {
+    CARBON_DCHECK(id.index >= 0, "{0}", id);
+    values_[id.index] = value;
+  }
+
+  // Returns a mutable value for an ID.
+  auto Get(IdT id) -> RefType {
+    CARBON_DCHECK(id.index >= 0, "{0}", id);
+    return values_[id.index];
+  }
+
+  // Returns the value for an ID.
+  auto Get(IdT id) const -> ConstRefType {
+    CARBON_DCHECK(id.index >= 0, "{0}", id);
+    return values_[id.index];
+  }
+
+  // Collects memory usage of the values.
+  auto CollectMemUsage(MemUsage& mem_usage, llvm::StringRef label) const
+      -> void {
+    mem_usage.Collect(label.str(), values_);
+  }
+
+  auto size() const -> size_t { return values_.size(); }
+
+ private:
+  // Allow default construction for `MakeForOverwrite`.
+  FixedSizeValueStore() = default;
+
+  // Storage for the `ValueT` objects, indexed by the id.
+  llvm::SmallVector<ValueT, 0> values_;
+};
+
+}  // namespace Carbon
+
+#endif  // CARBON_TOOLCHAIN_BASE_FIXED_SIZE_VALUE_STORE_H_

+ 26 - 16
toolchain/base/value_store.h

@@ -30,11 +30,29 @@ namespace Internal {
 // Used as a parent class for non-printable types. This is just for
 // std::conditional, not as an API.
 class ValueStoreNotPrintable {};
+
 }  // namespace Internal
 
 template <class IdT>
 class ValueStoreRange;
 
+// Common calculation for ValueStore types.
+template <typename IdT, typename ValueT = IdT::ValueType>
+class ValueStoreTypes {
+ public:
+  using ValueType = std::decay_t<ValueT>;
+
+  // Typically we want to use `ValueType&` and `const ValueType& to avoid
+  // copies, but when the value type is a `StringRef`, we assume external
+  // storage for the string data and both our value type and ref type will be
+  // `StringRef`. This will preclude mutation of the string data.
+  using RefType = std::conditional_t<std::same_as<llvm::StringRef, ValueType>,
+                                     llvm::StringRef, ValueType&>;
+  using ConstRefType =
+      std::conditional_t<std::same_as<llvm::StringRef, ValueType>,
+                         llvm::StringRef, const ValueType&>;
+};
+
 // A simple wrapper for accumulating values, providing IDs to later retrieve the
 // value. This does not do deduplication.
 //
@@ -47,17 +65,9 @@ class ValueStore
                             typename IdT::ValueType>,
           Yaml::Printable<ValueStore<IdT>>, Internal::ValueStoreNotPrintable> {
  public:
-  using ValueType = std::decay_t<typename IdT::ValueType>;
-
-  // Typically we want to use `ValueType&` and `const ValueType& to avoid
-  // copies, but when the value type is a `StringRef`, we assume external
-  // storage for the string data and both our value type and ref type will be
-  // `StringRef`. This will preclude mutation of the string data.
-  using RefType = std::conditional_t<std::same_as<llvm::StringRef, ValueType>,
-                                     llvm::StringRef, ValueType&>;
-  using ConstRefType =
-      std::conditional_t<std::same_as<llvm::StringRef, ValueType>,
-                         llvm::StringRef, const ValueType&>;
+  using ValueType = ValueStoreTypes<IdT>::ValueType;
+  using RefType = ValueStoreTypes<IdT>::RefType;
+  using ConstRefType = ValueStoreTypes<IdT>::ConstRefType;
 
   ValueStore() = default;
 
@@ -211,9 +221,9 @@ class ValueStoreRange {
 template <typename IdT>
 class CanonicalValueStore {
  public:
-  using ValueType = typename IdT::ValueType;
-  using RefType = typename ValueStore<IdT>::RefType;
-  using ConstRefType = typename ValueStore<IdT>::ConstRefType;
+  using ValueType = ValueStoreTypes<IdT>::ValueType;
+  using RefType = ValueStoreTypes<IdT>::RefType;
+  using ConstRefType = ValueStoreTypes<IdT>::ConstRefType;
 
   // Stores a canonical copy of the value and returns an ID to reference it.
   auto Add(ValueType value) -> IdT;
@@ -313,8 +323,8 @@ auto CanonicalValueStore<IdT>::Reserve(size_t size) -> void {
 template <typename RelatedIdT, typename IdT>
 class RelationalValueStore {
  public:
-  using ValueType = IdT::ValueType;
-  using ConstRefType = ValueStore<IdT>::ConstRefType;
+  using ValueType = ValueStoreTypes<IdT>::ValueType;
+  using ConstRefType = ValueStoreTypes<IdT>::ConstRefType;
 
   // Given the related ID and a value, stores the value and returns a mapped ID
   // to reference it in the store.

+ 1 - 0
toolchain/parse/BUILD

@@ -156,6 +156,7 @@ cc_library(
         "//common:find",
         "//common:ostream",
         "//common:struct_reflection",
+        "//toolchain/base:fixed_size_value_store",
         "//toolchain/base:value_store",
         "//toolchain/lex:token_index",
         "//toolchain/lex:tokenized_buffer",

+ 13 - 14
toolchain/parse/tree_and_subtrees.cpp

@@ -7,15 +7,16 @@
 #include <tuple>
 #include <utility>
 
+#include "toolchain/base/fixed_size_value_store.h"
 #include "toolchain/lex/token_index.h"
 
 namespace Carbon::Parse {
 
 TreeAndSubtrees::TreeAndSubtrees(const Lex::TokenizedBuffer& tokens,
                                  const Tree& tree)
-    : tokens_(&tokens), tree_(&tree) {
-  subtree_sizes_.reserve(tree_->size());
-
+    : tokens_(&tokens),
+      tree_(&tree),
+      subtree_sizes_(SubtreeSizeStore::MakeForOverwrite(tree_->size())) {
   // A stack of nodes which haven't yet been used as children.
   llvm::SmallVector<NodeId> size_stack;
   for (auto n : tree.postorder()) {
@@ -31,7 +32,7 @@ TreeAndSubtrees::TreeAndSubtrees(const Lex::TokenizedBuffer& tokens,
       for (auto i : llvm::seq(kind.child_count())) {
         auto child = size_stack.pop_back_val();
         CARBON_CHECK(static_cast<size_t>(child.index) < subtree_sizes_.size());
-        size += subtree_sizes_[child.index];
+        size += subtree_sizes_.Get(child);
         if (kind.has_bracket() && i == kind.child_count() - 1) {
           CARBON_CHECK(kind.bracket() == tree.node_kind(child),
                        "Node {0} with child count {1} needs bracket {2}, found "
@@ -45,28 +46,26 @@ TreeAndSubtrees::TreeAndSubtrees(const Lex::TokenizedBuffer& tokens,
         CARBON_CHECK(!size_stack.empty(), "Node {0} is missing bracket {1}",
                      kind, kind.bracket());
         auto child = size_stack.pop_back_val();
-        size += subtree_sizes_[child.index];
+        size += subtree_sizes_.Get(child);
         if (kind.bracket() == tree.node_kind(child)) {
           break;
         }
       }
     }
     size_stack.push_back(n);
-    subtree_sizes_.push_back(size);
+    subtree_sizes_.Set(n, size);
   }
 
-  CARBON_CHECK(static_cast<int>(subtree_sizes_.size()) == tree_->size());
-
   // Remaining nodes should all be roots in the tree; make sure they line up.
   CARBON_CHECK(
       size_stack.back().index == static_cast<int32_t>(tree_->size()) - 1,
       "{0} {1}", size_stack.back(), tree_->size() - 1);
   int prev_index = -1;
   for (const auto& n : size_stack) {
-    CARBON_CHECK(n.index - subtree_sizes_[n.index] == prev_index,
+    CARBON_CHECK(n.index - subtree_sizes_.Get(n) == prev_index,
                  "NodeId {0} is a root {1} with subtree_size {2}, but previous "
                  "root was at {3}.",
-                 n, tree_->node_kind(n), subtree_sizes_[n.index], prev_index);
+                 n, tree_->node_kind(n), subtree_sizes_.Get(n), prev_index);
     prev_index = n.index;
   }
 }
@@ -117,14 +116,14 @@ auto TreeAndSubtrees::postorder(NodeId n) const
     -> llvm::iterator_range<Tree::PostorderIterator> {
   // The postorder ends after this node, the root, and begins at the begin of
   // its subtree.
-  int begin_index = n.index - subtree_sizes_[n.index] + 1;
+  int begin_index = n.index - subtree_sizes_.Get(n) + 1;
   return Tree::PostorderIterator::MakeRange(NodeId(begin_index), n);
 }
 
 auto TreeAndSubtrees::children(NodeId n) const
     -> llvm::iterator_range<SiblingIterator> {
   CARBON_CHECK(n.has_value());
-  int end_index = n.index - subtree_sizes_[n.index];
+  int end_index = n.index - subtree_sizes_.Get(n);
   return llvm::iterator_range<SiblingIterator>(
       SiblingIterator(*this, NodeId(n.index - 1)),
       SiblingIterator(*this, NodeId(end_index)));
@@ -153,8 +152,8 @@ auto TreeAndSubtrees::PrintNode(llvm::raw_ostream& output, NodeId n, int depth,
     output << ", has_error: yes";
   }
 
-  if (subtree_sizes_[n.index] > 1) {
-    output << ", subtree_size: " << subtree_sizes_[n.index];
+  if (subtree_sizes_.Get(n) > 1) {
+    output << ", subtree_size: " << subtree_sizes_.Get(n);
     if (preorder) {
       output << ", children: [\n";
       return true;

+ 4 - 2
toolchain/parse/tree_and_subtrees.h

@@ -6,6 +6,7 @@
 #define CARBON_TOOLCHAIN_PARSE_TREE_AND_SUBTREES_H_
 
 #include "llvm/ADT/SmallVector.h"
+#include "toolchain/base/fixed_size_value_store.h"
 #include "toolchain/lex/token_index.h"
 #include "toolchain/parse/tree.h"
 
@@ -184,7 +185,8 @@ class TreeAndSubtrees {
   // first child of its parent, this will be an offset to the node's parent's
   // next sibling, or if it the parent is also a first child, the grandparent's
   // next sibling, and so on.
-  llvm::SmallVector<int32_t> subtree_sizes_;
+  using SubtreeSizeStore = FixedSizeValueStore<NodeId, int32_t>;
+  SubtreeSizeStore subtree_sizes_;
 };
 
 // A standard signature for a callback to support lazy construction.
@@ -218,7 +220,7 @@ class TreeAndSubtrees::SiblingIterator
 
   using iterator_facade_base::operator++;
   auto operator++() -> SiblingIterator& {
-    node_.index -= tree_->subtree_sizes_[node_.index];
+    node_.index -= tree_->subtree_sizes_.Get(node_);
     return *this;
   }
 

+ 1 - 0
toolchain/sem_ir/BUILD

@@ -184,6 +184,7 @@ cc_library(
         ":typed_insts",
         "//common:concepts",
         "//common:ostream",
+        "//toolchain/base:fixed_size_value_store",
         "//toolchain/base:kind_switch",
         "//toolchain/base:shared_value_stores",
         "//toolchain/lex:tokenized_buffer",

+ 13 - 13
toolchain/sem_ir/formatter.cpp

@@ -11,6 +11,7 @@
 #include "llvm/ADT/Sequence.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/SaveAndRestore.h"
+#include "toolchain/base/fixed_size_value_store.h"
 #include "toolchain/base/kind_switch.h"
 #include "toolchain/base/shared_value_stores.h"
 #include "toolchain/lex/tokenized_buffer.h"
@@ -40,12 +41,10 @@ Formatter::Formatter(const File* sem_ir,
       inst_namer_(sem_ir_),
       get_tree_and_subtrees_(get_tree_and_subtrees),
       include_ir_in_dumps_(include_ir_in_dumps),
-      use_dump_sem_ir_ranges_(use_dump_sem_ir_ranges) {
-  // Create a placeholder visible chunk and assign it to all instructions that
-  // don't have a chunk of their own.
-  auto first_chunk = AddChunkNoFlush(true);
-  tentative_inst_chunks_.resize(sem_ir_->insts().size(), first_chunk);
-
+      use_dump_sem_ir_ranges_(use_dump_sem_ir_ranges),
+      // Create a placeholder visible chunk and assign it to all instructions
+      // that don't have a chunk of their own.
+      tentative_inst_chunks_(sem_ir_->insts().size(), AddChunkNoFlush(true)) {
   if (use_dump_sem_ir_ranges_) {
     ComputeNodeParents();
   }
@@ -54,7 +53,7 @@ Formatter::Formatter(const File* sem_ir,
   for (auto inst_id : llvm::concat<const InstId>(
            sem_ir_->constants().array_ref(),
            sem_ir_->inst_blocks().Get(InstBlockId::Imports))) {
-    tentative_inst_chunks_[inst_id.index] = AddChunkNoFlush(false);
+    tentative_inst_chunks_.Set(inst_id, AddChunkNoFlush(false));
   }
 
   // Create a real chunk for the start of the output.
@@ -103,11 +102,12 @@ auto Formatter::Format() -> void {
 }
 
 auto Formatter::ComputeNodeParents() -> void {
-  CARBON_CHECK(node_parents_.empty());
-  node_parents_.resize(sem_ir_->parse_tree().size(), Parse::NodeId::None);
+  CARBON_CHECK(!node_parents_);
+  node_parents_ =
+      NodeParentStore(sem_ir_->parse_tree().size(), Parse::NodeId::None);
   for (auto n : sem_ir_->parse_tree().postorder()) {
     for (auto child : get_tree_and_subtrees_().children(n)) {
-      node_parents_[child.index] = n;
+      node_parents_->Set(child, n);
     }
   }
 }
@@ -215,7 +215,7 @@ auto Formatter::ShouldFormatEntity(InstId decl_id) -> bool {
   // body.
   auto end_node_id = loc_id.node_id();
   if (IsDefinitionStart(sem_ir_->parse_tree().node_kind(end_node_id))) {
-    end_node_id = node_parents_[end_node_id.index];
+    end_node_id = node_parents_->Get(end_node_id);
   }
 
   Lex::InclusiveTokenRange range = {
@@ -313,7 +313,7 @@ auto Formatter::FormatTopLevelScopeIfUsed(InstNamer::ScopeId scope_id,
     if (use_tentative_output_scopes) {
       // This is for constants and imports. These use tentative logic to
       // determine whether an instruction is printed.
-      TentativeOutputScope scope(*this, tentative_inst_chunks_[inst_id.index]);
+      TentativeOutputScope scope(*this, tentative_inst_chunks_.Get(inst_id));
       FormatInst(inst_id);
     } else if (ShouldFormatInst(inst_id)) {
       // This is for the file scope. It uses only the range-based filtering.
@@ -1336,7 +1336,7 @@ auto Formatter::FormatName(NameId id) -> void {
 
 auto Formatter::FormatName(InstId id) -> void {
   if (id.has_value()) {
-    IncludeChunkInOutput(tentative_inst_chunks_[id.index]);
+    IncludeChunkInOutput(tentative_inst_chunks_.Get(id));
   }
   out_ << inst_namer_.GetNameFor(scope_, id);
 }

+ 5 - 4
toolchain/sem_ir/formatter.h

@@ -9,6 +9,7 @@
 
 #include "common/concepts.h"
 #include "llvm/Support/raw_ostream.h"
+#include "toolchain/base/fixed_size_value_store.h"
 #include "toolchain/parse/tree_and_subtrees.h"
 #include "toolchain/sem_ir/file.h"
 #include "toolchain/sem_ir/inst_namer.h"
@@ -375,13 +376,13 @@ class Formatter {
   llvm::StringRef pending_imported_from_;
 
   // Indexes of chunks of output that should be included when an instruction is
-  // referenced, indexed by the instruction's index. This is resized in advance
-  // to the correct size.
-  llvm::SmallVector<size_t, 0> tentative_inst_chunks_;
+  // referenced, indexed by the instruction's index.
+  FixedSizeValueStore<InstId, size_t> tentative_inst_chunks_;
 
   // Maps nodes to their parents. Only set when dump ranges are in use, because
   // the parents aren't used otherwise.
-  llvm::SmallVector<Parse::NodeId> node_parents_;
+  using NodeParentStore = FixedSizeValueStore<Parse::NodeId, Parse::NodeId>;
+  std::optional<NodeParentStore> node_parents_;
 };
 
 template <typename IdT>