Explorar el Código

Add build option `--features=poison_value_stores`. (#5438)

With this enabled, entities that live in value stores are poisoned
whenever any action is taken that might invalidate pointers and
references to those options -- in particular, adding another item to
that value store, or attempting to load any entity from an import IR.
Subsequent uses of those pointers or references then trigger an ASan
failure.

This detects latent bugs where the pointer or reference to the entity
would become stale if we got unlucky about when the value store
reallocates, even in cases where the reallocation didn't actually
happen.

This is not enabled by default: it finds a lot of latent bugs, so our
tests don't pass with this option. This PR also includes fixes for a few
of those bugs.

---------

Co-authored-by: Chandler Carruth <chandlerc@gmail.com>
Co-authored-by: Carbon Infra Bot <carbon-external-infra@google.com>
Richard Smith hace 11 meses
padre
commit
71715263ce

+ 17 - 0
bazel/cc_toolchains/clang_cc_toolchain_config.bzl

@@ -643,6 +643,22 @@ def _impl(ctx):
         ],
         ],
     )
     )
 
 
+    # A feature that enables poisoning of value stores to detect use after
+    # potential reallocation bugs.
+    #
+    # TODO: Remove this and leave poisoning always on once these bugs are
+    # fixed.
+    poison_value_stores = feature(
+        name = "poison_value_stores",
+        requires = [feature_set(["asan"])],
+        flag_sets = [flag_set(
+            actions = all_compile_actions,
+            flag_groups = [flag_group(flags = [
+                "-DCARBON_POISON_VALUE_STORES=1",
+            ])],
+        )],
+    )
+
     fuzzer = feature(
     fuzzer = feature(
         name = "fuzzer",
         name = "fuzzer",
         flag_sets = [flag_set(
         flag_sets = [flag_set(
@@ -1146,6 +1162,7 @@ def _impl(ctx):
         asan,
         asan,
         asan_min_size,
         asan_min_size,
         enable_in_fastbuild,
         enable_in_fastbuild,
+        poison_value_stores,
         fuzzer,
         fuzzer,
         layering_check,
         layering_check,
         module_maps,
         module_maps,

+ 117 - 2
toolchain/base/value_store.h

@@ -16,6 +16,7 @@
 #include "llvm/ADT/Sequence.h"
 #include "llvm/ADT/Sequence.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/iterator_range.h"
+#include "llvm/Support/Compiler.h"
 #include "toolchain/base/mem_usage.h"
 #include "toolchain/base/mem_usage.h"
 #include "toolchain/base/yaml.h"
 #include "toolchain/base/yaml.h"
 
 
@@ -28,6 +29,17 @@ namespace Internal {
 class ValueStoreNotPrintable {};
 class ValueStoreNotPrintable {};
 }  // namespace Internal
 }  // namespace Internal
 
 
+// Setup our compile time condition controlling poisoning of value stores. This
+// is set to one by the Bazel flag `--features=poison_value_stores`.
+//
+// TODO: Eventually, this will always enabled when ASan is enabled, but we can't
+// do that until we clean up all of the latent bugs.
+#ifndef CARBON_POISON_VALUE_STORES
+#define CARBON_POISON_VALUE_STORES 0
+#elif !LLVM_ADDRESS_SANITIZER_BUILD
+#error "CARBON_POISON_VALUE_STORES requires address sanitizer"
+#endif
+
 // A simple wrapper for accumulating values, providing IDs to later retrieve the
 // A simple wrapper for accumulating values, providing IDs to later retrieve the
 // value. This does not do deduplication.
 // value. This does not do deduplication.
 //
 //
@@ -51,6 +63,28 @@ class ValueStore
       std::conditional_t<std::same_as<llvm::StringRef, ValueType>,
       std::conditional_t<std::same_as<llvm::StringRef, ValueType>,
                          llvm::StringRef, const ValueType&>;
                          llvm::StringRef, const ValueType&>;
 
 
+  ValueStore() = default;
+  ValueStore(ValueStore&& other) noexcept
+      : values_((other.UnpoisonAll(), std::move(other.values_)))
+#if CARBON_POISON_VALUE_STORES
+        ,
+        all_poisoned_(false)
+#endif
+  {
+    PoisonAll();
+  }
+  auto operator=(ValueStore&& other) noexcept -> ValueStore& {
+    UnpoisonAll();
+    other.UnpoisonAll();
+    values_ = std::move(other.values_);
+#if CARBON_POISON_VALUE_STORES
+    all_poisoned_ = false;
+#endif
+    PoisonAll();
+    return *this;
+  }
+  ~ValueStore() { UnpoisonAll(); }
+
   // Stores the value and returns an ID to reference it.
   // Stores the value and returns an ID to reference it.
   auto Add(ValueType value) -> IdT {
   auto Add(ValueType value) -> IdT {
     IdT id(values_.size());
     IdT id(values_.size());
@@ -58,27 +92,55 @@ class ValueStore
     // for the value provided, so only do this in debug builds to make tracking
     // for the value provided, so only do this in debug builds to make tracking
     // down issues easier.
     // down issues easier.
     CARBON_DCHECK(id.index >= 0, "Id overflow");
     CARBON_DCHECK(id.index >= 0, "Id overflow");
+
+    bool realloc = values_.capacity() == values_.size();
+    if (realloc) {
+      // Unpoison everything if the push will reallocate, in order to allow the
+      // vector to make a copy of the elements.
+      UnpoisonAll();
+    } else {
+      PoisonAll();
+    }
+
     values_.push_back(std::move(value));
     values_.push_back(std::move(value));
+
+    if (realloc) {
+      PoisonAll();
+    } else {
+      PoisonElement(id.index);
+    }
+
     return id;
     return id;
   }
   }
 
 
   // Returns a mutable value for an ID.
   // Returns a mutable value for an ID.
   auto Get(IdT id) -> RefType {
   auto Get(IdT id) -> RefType {
     CARBON_DCHECK(id.index >= 0, "{0}", id);
     CARBON_DCHECK(id.index >= 0, "{0}", id);
+    UnpoisonElement(id.index);
     return values_[id.index];
     return values_[id.index];
   }
   }
 
 
   // Returns the value for an ID.
   // Returns the value for an ID.
   auto Get(IdT id) const -> ConstRefType {
   auto Get(IdT id) const -> ConstRefType {
     CARBON_DCHECK(id.index >= 0, "{0}", id);
     CARBON_DCHECK(id.index >= 0, "{0}", id);
+    UnpoisonElement(id.index);
     return values_[id.index];
     return values_[id.index];
   }
   }
 
 
   // Reserves space.
   // Reserves space.
-  auto Reserve(size_t size) -> void { values_.reserve(size); }
+  auto Reserve(size_t size) -> void {
+    UnpoisonAll();
+    values_.reserve(size);
+    PoisonAll();
+  }
+
+  // Invalidates all current pointers and references into the value store. Used
+  // in debug builds to trigger use-after-invalidation bugs.
+  auto Invalidate() -> void { PoisonAll(); }
 
 
   // These are to support printable structures, and are not guaranteed.
   // These are to support printable structures, and are not guaranteed.
   auto OutputYaml() const -> Yaml::OutputMapping {
   auto OutputYaml() const -> Yaml::OutputMapping {
+    UnpoisonAll();
     return Yaml::OutputMapping([&](Yaml::OutputMapping::Map map) {
     return Yaml::OutputMapping([&](Yaml::OutputMapping::Map map) {
       for (auto [id, value] : enumerate()) {
       for (auto [id, value] : enumerate()) {
         map.Add(PrintToString(id), Yaml::OutputScalar(value));
         map.Add(PrintToString(id), Yaml::OutputScalar(value));
@@ -89,10 +151,14 @@ class ValueStore
   // Collects memory usage of the values.
   // Collects memory usage of the values.
   auto CollectMemUsage(MemUsage& mem_usage, llvm::StringRef label) const
   auto CollectMemUsage(MemUsage& mem_usage, llvm::StringRef label) const
       -> void {
       -> void {
+    UnpoisonAll();
     mem_usage.Collect(label.str(), values_);
     mem_usage.Collect(label.str(), values_);
   }
   }
 
 
-  auto array_ref() const -> llvm::ArrayRef<ValueType> { return values_; }
+  auto array_ref() const -> llvm::ArrayRef<ValueType> {
+    UnpoisonAll();
+    return values_;
+  }
   auto size() const -> size_t { return values_.size(); }
   auto size() const -> size_t { return values_.size(); }
 
 
   // Makes an iterable range over pairs of the index and a reference to the
   // Makes an iterable range over pairs of the index and a reference to the
@@ -105,6 +171,7 @@ class ValueStore
   // for (auto [id, value] : store.enumerate()) { ... }
   // for (auto [id, value] : store.enumerate()) { ... }
   // ```
   // ```
   auto enumerate() const -> auto {
   auto enumerate() const -> auto {
+    UnpoisonAll();
     auto index_to_id = [](auto pair) -> std::pair<IdT, ConstRefType> {
     auto index_to_id = [](auto pair) -> std::pair<IdT, ConstRefType> {
       auto [index, value] = pair;
       auto [index, value] = pair;
       return std::pair<IdT, ConstRefType>(IdT(index), value);
       return std::pair<IdT, ConstRefType>(IdT(index), value);
@@ -113,9 +180,53 @@ class ValueStore
   }
   }
 
 
  private:
  private:
+  // Poison the entire contents of the value store. This is used to detect cases
+  // where references to elements in a value store are used across calls that
+  // might modify the store.
+  auto PoisonAll() const -> void {
+#if CARBON_POISON_VALUE_STORES
+    if (!all_poisoned_) {
+      __asan_poison_memory_region(values_.data(),
+                                  values_.size() * sizeof(values_[0]));
+      all_poisoned_ = true;
+    }
+#endif
+  }
+  // Unpoison the entire contents of the value store.
+  auto UnpoisonAll() const -> void {
+#if CARBON_POISON_VALUE_STORES
+    __asan_unpoison_memory_region(values_.data(),
+                                  values_.size() * sizeof(values_[0]));
+    all_poisoned_ = false;
+#endif
+  }
+  // Poison a single element.
+  auto PoisonElement([[maybe_unused]] int element) const -> void {
+#if CARBON_POISON_VALUE_STORES
+    __asan_unpoison_memory_region(values_.data() + element, sizeof(values_[0]));
+#endif
+  }
+  // Unpoison a single element.
+  auto UnpoisonElement([[maybe_unused]] int element) const -> void {
+#if CARBON_POISON_VALUE_STORES
+    __asan_unpoison_memory_region(values_.data() + element, sizeof(values_[0]));
+    all_poisoned_ = false;
+#endif
+  }
+
   // Set inline size to 0 because these will typically be too large for the
   // Set inline size to 0 because these will typically be too large for the
   // stack, while this does make File smaller.
   // stack, while this does make File smaller.
   llvm::SmallVector<std::decay_t<ValueType>, 0> values_;
   llvm::SmallVector<std::decay_t<ValueType>, 0> values_;
+
+#if CARBON_POISON_VALUE_STORES
+  // Whether the vector is currently fully poisoned.
+  //
+  // We use this to avoid repeated re-poisoning of the entire store. Doing so is
+  // linear in the size of the store, and we trigger re-poisoning frequently,
+  // for example on each import. Tracking that here allows us to coalesce these
+  // into a single linear operation.
+  mutable bool all_poisoned_ = true;
+#endif
 };
 };
 
 
 // A wrapper for accumulating immutable values with deduplication, providing IDs
 // A wrapper for accumulating immutable values with deduplication, providing IDs
@@ -142,6 +253,10 @@ class CanonicalValueStore {
   // Reserves space.
   // Reserves space.
   auto Reserve(size_t size) -> void;
   auto Reserve(size_t size) -> void;
 
 
+  // Invalidates all current pointers and references into the value store. Used
+  // in debug builds to trigger use-after-invalidation bugs.
+  auto Invalidate() -> void { values_.Invalidate(); }
+
   // These are to support printable structures, and are not guaranteed.
   // These are to support printable structures, and are not guaranteed.
   auto OutputYaml() const -> Yaml::OutputMapping {
   auto OutputYaml() const -> Yaml::OutputMapping {
     return values_.OutputYaml();
     return values_.OutputYaml();

+ 15 - 8
toolchain/check/convert.cpp

@@ -567,13 +567,21 @@ static auto ConvertStructToClass(
     SemIR::InstId value_id, ConversionTarget target,
     SemIR::InstId value_id, ConversionTarget target,
     SemIR::InstId dest_vtable_id = SemIR::InstId::None) -> SemIR::InstId {
     SemIR::InstId dest_vtable_id = SemIR::InstId::None) -> SemIR::InstId {
   PendingBlock target_block(&context);
   PendingBlock target_block(&context);
-  auto& dest_class_info = context.classes().Get(dest_type.class_id);
-  CARBON_CHECK(dest_class_info.inheritance_kind != SemIR::Class::Abstract);
-  auto object_repr_id =
-      dest_class_info.GetObjectRepr(context.sem_ir(), dest_type.specific_id);
-  if (object_repr_id == SemIR::ErrorInst::TypeId) {
-    return SemIR::ErrorInst::InstId;
+  auto object_repr_id = SemIR::TypeId::None;
+
+  {
+    auto& dest_class_info = context.classes().Get(dest_type.class_id);
+    CARBON_CHECK(dest_class_info.inheritance_kind != SemIR::Class::Abstract);
+    if (!dest_vtable_id.has_value()) {
+      dest_vtable_id = dest_class_info.vtable_id;
+    }
+    object_repr_id =
+        dest_class_info.GetObjectRepr(context.sem_ir(), dest_type.specific_id);
+    if (object_repr_id == SemIR::ErrorInst::TypeId) {
+      return SemIR::ErrorInst::InstId;
+    }
   }
   }
+
   auto dest_struct_type =
   auto dest_struct_type =
       context.types().GetAs<SemIR::StructType>(object_repr_id);
       context.types().GetAs<SemIR::StructType>(object_repr_id);
 
 
@@ -588,8 +596,7 @@ static auto ConvertStructToClass(
   }
   }
 
 
   auto result_id = ConvertStructToStructOrClass<SemIR::ClassElementAccess>(
   auto result_id = ConvertStructToStructOrClass<SemIR::ClassElementAccess>(
-      context, src_type, dest_struct_type, value_id, target,
-      dest_vtable_id.has_value() ? dest_vtable_id : dest_class_info.vtable_id);
+      context, src_type, dest_struct_type, value_id, target, dest_vtable_id);
 
 
   if (need_temporary) {
   if (need_temporary) {
     target_block.InsertHere();
     target_block.InsertHere();

+ 18 - 15
toolchain/check/handle_class.cpp

@@ -366,10 +366,9 @@ auto HandleParseNode(Context& context, Parse::AdaptDeclId node_id) -> bool {
     return true;
     return true;
   }
   }
 
 
-  auto& class_info = context.classes().Get(parent_class_decl->class_id);
-  if (class_info.adapt_id.has_value()) {
-    DiagnoseClassSpecificDeclRepeated(context, node_id,
-                                      SemIR::LocId(class_info.adapt_id),
+  auto adapt_id = context.classes().Get(parent_class_decl->class_id).adapt_id;
+  if (adapt_id.has_value()) {
+    DiagnoseClassSpecificDeclRepeated(context, node_id, SemIR::LocId(adapt_id),
                                       Lex::TokenKind::Adapt);
                                       Lex::TokenKind::Adapt);
     return true;
     return true;
   }
   }
@@ -396,8 +395,10 @@ auto HandleParseNode(Context& context, Parse::AdaptDeclId node_id) -> bool {
   }
   }
 
 
   // Build a SemIR representation for the declaration.
   // Build a SemIR representation for the declaration.
-  class_info.adapt_id = AddInst<SemIR::AdaptDecl>(
+  adapt_id = AddInst<SemIR::AdaptDecl>(
       context, node_id, {.adapted_type_inst_id = adapted_type_inst_id});
       context, node_id, {.adapted_type_inst_id = adapted_type_inst_id});
+  auto& class_info = context.classes().Get(parent_class_decl->class_id);
+  class_info.adapt_id = adapt_id;
 
 
   // Extend the class scope with the adapted type's scope if requested.
   // Extend the class scope with the adapted type's scope if requested.
   if (introducer.modifier_set.HasAnyOf(KeywordModifierSet::Extend)) {
   if (introducer.modifier_set.HasAnyOf(KeywordModifierSet::Extend)) {
@@ -501,10 +502,9 @@ auto HandleParseNode(Context& context, Parse::BaseDeclId node_id) -> bool {
     return true;
     return true;
   }
   }
 
 
-  auto& class_info = context.classes().Get(parent_class_decl->class_id);
-  if (class_info.base_id.has_value()) {
-    DiagnoseClassSpecificDeclRepeated(context, node_id,
-                                      SemIR::LocId(class_info.base_id),
+  auto base_id = context.classes().Get(parent_class_decl->class_id).base_id;
+  if (base_id.has_value()) {
+    DiagnoseClassSpecificDeclRepeated(context, node_id, SemIR::LocId(base_id),
                                       Lex::TokenKind::Base);
                                       Lex::TokenKind::Base);
     return true;
     return true;
   }
   }
@@ -525,13 +525,16 @@ auto HandleParseNode(Context& context, Parse::BaseDeclId node_id) -> bool {
   // The `base` value in the class scope has an unbound element type. Instance
   // The `base` value in the class scope has an unbound element type. Instance
   // binding will be performed when it's found by name lookup into an instance.
   // binding will be performed when it's found by name lookup into an instance.
   auto field_type_id = GetUnboundElementType(
   auto field_type_id = GetUnboundElementType(
-      context, context.types().GetInstId(class_info.self_type_id),
+      context,
+      context.types().GetInstId(
+          context.classes().Get(parent_class_decl->class_id).self_type_id),
       base_info.inst_id);
       base_info.inst_id);
-  class_info.base_id =
-      AddInst<SemIR::BaseDecl>(context, node_id,
-                               {.type_id = field_type_id,
-                                .base_type_inst_id = base_info.inst_id,
-                                .index = SemIR::ElementIndex::None});
+  base_id = AddInst<SemIR::BaseDecl>(context, node_id,
+                                     {.type_id = field_type_id,
+                                      .base_type_inst_id = base_info.inst_id,
+                                      .index = SemIR::ElementIndex::None});
+  auto& class_info = context.classes().Get(parent_class_decl->class_id);
+  class_info.base_id = base_id;
 
 
   if (base_info.type_id != SemIR::ErrorInst::TypeId) {
   if (base_info.type_id != SemIR::ErrorInst::TypeId) {
     auto base_class_info = context.classes().Get(
     auto base_class_info = context.classes().Get(

+ 9 - 3
toolchain/check/impl_lookup.cpp

@@ -279,14 +279,20 @@ static auto GetWitnessIdForImpl(Context& context, SemIR::LocId loc_id,
     return EvalImplLookupResult::MakeNone();
     return EvalImplLookupResult::MakeNone();
   }
   }
 
 
-  LoadImportRef(context, impl.witness_id);
+  bool is_effectively_final = query_is_concrete || impl.is_final;
+  auto witness_id = impl.witness_id;
+
+  // Note that this invalidates our `impl` reference. Don't use it again after
+  // this point.
+  LoadImportRef(context, witness_id);
+
   if (specific_id.has_value()) {
   if (specific_id.has_value()) {
     // We need a definition of the specific `impl` so we can access its
     // We need a definition of the specific `impl` so we can access its
     // witness.
     // witness.
     ResolveSpecificDefinition(context, loc_id, specific_id);
     ResolveSpecificDefinition(context, loc_id, specific_id);
   }
   }
 
 
-  if (query_is_concrete || impl.is_final) {
+  if (is_effectively_final) {
     // TODO: These final results should be cached somehow. Positive (non-None)
     // TODO: These final results should be cached somehow. Positive (non-None)
     // results could be cached globally, as they can not change. But
     // results could be cached globally, as they can not change. But
     // negative results can change after a final impl is written, so
     // negative results can change after a final impl is written, so
@@ -294,7 +300,7 @@ static auto GetWitnessIdForImpl(Context& context, SemIR::LocId loc_id,
     // be invalidated by writing a final impl that would match.
     // be invalidated by writing a final impl that would match.
     return EvalImplLookupResult::MakeFinal(
     return EvalImplLookupResult::MakeFinal(
         context.constant_values().GetInstId(SemIR::GetConstantValueInSpecific(
         context.constant_values().GetInstId(SemIR::GetConstantValueInSpecific(
-            context.sem_ir(), specific_id, impl.witness_id)));
+            context.sem_ir(), specific_id, witness_id)));
   } else {
   } else {
     return EvalImplLookupResult::MakeNonFinal();
     return EvalImplLookupResult::MakeNonFinal();
   }
   }

+ 14 - 0
toolchain/check/import_ref.cpp

@@ -3277,6 +3277,20 @@ static auto GetInstForLoad(Context& context,
 
 
 // NOLINTNEXTLINE(misc-no-recursion)
 // NOLINTNEXTLINE(misc-no-recursion)
 auto LoadImportRef(Context& context, SemIR::InstId inst_id) -> void {
 auto LoadImportRef(Context& context, SemIR::InstId inst_id) -> void {
+#if LLVM_ADDRESS_SANITIZER_BUILD
+  // Under ASan, invalidate all of our value stores on any import in order to
+  // flush out bugs where pointers and references to entities are held across
+  // imports.
+  context.classes().Invalidate();
+  context.entity_names().Invalidate();
+  context.facet_types().Invalidate();
+  context.functions().Invalidate();
+  context.generics().Invalidate();
+  context.impls().Invalidate();
+  context.interfaces().Invalidate();
+  context.specifics().Invalidate();
+#endif
+
   auto inst = context.insts().TryGetAs<SemIR::ImportRefUnloaded>(inst_id);
   auto inst = context.insts().TryGetAs<SemIR::ImportRefUnloaded>(inst_id);
   if (!inst) {
   if (!inst) {
     return;
     return;

+ 4 - 0
toolchain/sem_ir/generic.h

@@ -127,6 +127,10 @@ class SpecificStore : public Yaml::Printable<SpecificStore> {
                                    : InstBlockId::Empty;
                                    : InstBlockId::Empty;
   }
   }
 
 
+  // Invalidates all current pointers and references into the value store. Used
+  // in debug builds to trigger use-after-invalidation bugs.
+  auto Invalidate() -> void { specifics_.Invalidate(); }
+
   // These are to support printable structures, and are not guaranteed.
   // These are to support printable structures, and are not guaranteed.
   auto OutputYaml() const -> Yaml::OutputMapping {
   auto OutputYaml() const -> Yaml::OutputMapping {
     return specifics_.OutputYaml();
     return specifics_.OutputYaml();

+ 4 - 0
toolchain/sem_ir/impl.h

@@ -181,6 +181,10 @@ class ImplStore {
   // Returns the value for an ID.
   // Returns the value for an ID.
   auto Get(ImplId id) const -> const Impl& { return values_.Get(id); }
   auto Get(ImplId id) const -> const Impl& { return values_.Get(id); }
 
 
+  // Invalidates all current pointers and references into the value store. Used
+  // in debug builds to trigger use-after-invalidation bugs.
+  auto Invalidate() -> void { values_.Invalidate(); }
+
   auto OutputYaml() const -> Yaml::OutputMapping {
   auto OutputYaml() const -> Yaml::OutputMapping {
     return values_.OutputYaml();
     return values_.OutputYaml();
   }
   }