value_store.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #ifndef CARBON_TOOLCHAIN_BASE_VALUE_STORE_H_
  5. #define CARBON_TOOLCHAIN_BASE_VALUE_STORE_H_
  6. #include <memory>
  7. #include <type_traits>
  8. #include "common/check.h"
  9. #include "common/hashtable_key_context.h"
  10. #include "common/ostream.h"
  11. #include "common/set.h"
  12. #include "llvm/ADT/STLExtras.h"
  13. #include "llvm/ADT/Sequence.h"
  14. #include "llvm/ADT/SmallVector.h"
  15. #include "llvm/ADT/iterator_range.h"
  16. #include "llvm/Support/Compiler.h"
  17. #include "toolchain/base/mem_usage.h"
  18. #include "toolchain/base/yaml.h"
  19. namespace Carbon {
  20. namespace Internal {
  21. // Used as a parent class for non-printable types. This is just for
  22. // std::conditional, not as an API.
  23. class ValueStoreNotPrintable {};
  24. } // namespace Internal
  25. // Setup our compile time condition controlling poisoning of value stores. This
  26. // is set to one by the Bazel flag `--features=poison_value_stores`.
  27. //
  28. // TODO: Eventually, this will always enabled when ASan is enabled, but we can't
  29. // do that until we clean up all of the latent bugs.
  30. #ifndef CARBON_POISON_VALUE_STORES
  31. #define CARBON_POISON_VALUE_STORES 0
  32. #elif !LLVM_ADDRESS_SANITIZER_BUILD
  33. #error "CARBON_POISON_VALUE_STORES requires address sanitizer"
  34. #endif
  35. // A simple wrapper for accumulating values, providing IDs to later retrieve the
  36. // value. This does not do deduplication.
  37. //
  38. // IdT::ValueType must represent the type being indexed.
  39. template <typename IdT>
  40. class ValueStore
  41. : public std::conditional<
  42. std::is_base_of_v<Printable<typename IdT::ValueType>,
  43. typename IdT::ValueType>,
  44. Yaml::Printable<ValueStore<IdT>>, Internal::ValueStoreNotPrintable> {
  45. public:
  46. using ValueType = typename IdT::ValueType;
  47. // Typically we want to use `ValueType&` and `const ValueType& to avoid
  48. // copies, but when the value type is a `StringRef`, we assume external
  49. // storage for the string data and both our value type and ref type will be
  50. // `StringRef`. This will preclude mutation of the string data.
  51. using RefType = std::conditional_t<std::same_as<llvm::StringRef, ValueType>,
  52. llvm::StringRef, ValueType&>;
  53. using ConstRefType =
  54. std::conditional_t<std::same_as<llvm::StringRef, ValueType>,
  55. llvm::StringRef, const ValueType&>;
  56. ValueStore() = default;
  57. ValueStore(ValueStore&& other) noexcept
  58. : values_((other.UnpoisonAll(), std::move(other.values_)))
  59. #if CARBON_POISON_VALUE_STORES
  60. ,
  61. all_poisoned_(false)
  62. #endif
  63. {
  64. PoisonAll();
  65. }
  66. auto operator=(ValueStore&& other) noexcept -> ValueStore& {
  67. UnpoisonAll();
  68. other.UnpoisonAll();
  69. values_ = std::move(other.values_);
  70. #if CARBON_POISON_VALUE_STORES
  71. all_poisoned_ = false;
  72. #endif
  73. PoisonAll();
  74. return *this;
  75. }
  76. ~ValueStore() { UnpoisonAll(); }
  77. // Stores the value and returns an ID to reference it.
  78. auto Add(ValueType value) -> IdT {
  79. IdT id(values_.size());
  80. // This routine is especially hot and the check here relatively expensive
  81. // for the value provided, so only do this in debug builds to make tracking
  82. // down issues easier.
  83. CARBON_DCHECK(id.index >= 0, "Id overflow");
  84. bool realloc = values_.capacity() == values_.size();
  85. if (realloc) {
  86. // Unpoison everything if the push will reallocate, in order to allow the
  87. // vector to make a copy of the elements.
  88. UnpoisonAll();
  89. } else {
  90. PoisonAll();
  91. }
  92. values_.push_back(std::move(value));
  93. if (realloc) {
  94. PoisonAll();
  95. } else {
  96. PoisonElement(id.index);
  97. }
  98. return id;
  99. }
  100. // Returns a mutable value for an ID.
  101. auto Get(IdT id) -> RefType {
  102. CARBON_DCHECK(id.index >= 0, "{0}", id);
  103. UnpoisonElement(id.index);
  104. return values_[id.index];
  105. }
  106. // Returns the value for an ID.
  107. auto Get(IdT id) const -> ConstRefType {
  108. CARBON_DCHECK(id.index >= 0, "{0}", id);
  109. UnpoisonElement(id.index);
  110. return values_[id.index];
  111. }
  112. // Reserves space.
  113. auto Reserve(size_t size) -> void {
  114. UnpoisonAll();
  115. values_.reserve(size);
  116. PoisonAll();
  117. }
  118. // Invalidates all current pointers and references into the value store. Used
  119. // in debug builds to trigger use-after-invalidation bugs.
  120. auto Invalidate() -> void { PoisonAll(); }
  121. // These are to support printable structures, and are not guaranteed.
  122. auto OutputYaml() const -> Yaml::OutputMapping {
  123. UnpoisonAll();
  124. return Yaml::OutputMapping([&](Yaml::OutputMapping::Map map) {
  125. for (auto [id, value] : enumerate()) {
  126. map.Add(PrintToString(id), Yaml::OutputScalar(value));
  127. }
  128. });
  129. }
  130. // Collects memory usage of the values.
  131. auto CollectMemUsage(MemUsage& mem_usage, llvm::StringRef label) const
  132. -> void {
  133. UnpoisonAll();
  134. mem_usage.Collect(label.str(), values_);
  135. }
  136. auto array_ref() const -> llvm::ArrayRef<ValueType> {
  137. UnpoisonAll();
  138. return values_;
  139. }
  140. auto size() const -> size_t { return values_.size(); }
  141. // Makes an iterable range over pairs of the index and a reference to the
  142. // value for each value in the store.
  143. //
  144. // The range is over references to the values in the store, even if used with
  145. // `auto` to destructure the pair. In this example, the `value` is a
  146. // `ConstRefType`:
  147. // ```
  148. // for (auto [id, value] : store.enumerate()) { ... }
  149. // ```
  150. auto enumerate() const -> auto {
  151. UnpoisonAll();
  152. auto index_to_id = [](auto pair) -> std::pair<IdT, ConstRefType> {
  153. auto [index, value] = pair;
  154. return std::pair<IdT, ConstRefType>(IdT(index), value);
  155. };
  156. return llvm::map_range(llvm::enumerate(values_), index_to_id);
  157. }
  158. private:
  159. // Poison the entire contents of the value store. This is used to detect cases
  160. // where references to elements in a value store are used across calls that
  161. // might modify the store.
  162. auto PoisonAll() const -> void {
  163. #if CARBON_POISON_VALUE_STORES
  164. if (!all_poisoned_) {
  165. __asan_poison_memory_region(values_.data(),
  166. values_.size() * sizeof(values_[0]));
  167. all_poisoned_ = true;
  168. }
  169. #endif
  170. }
  171. // Unpoison the entire contents of the value store.
  172. auto UnpoisonAll() const -> void {
  173. #if CARBON_POISON_VALUE_STORES
  174. __asan_unpoison_memory_region(values_.data(),
  175. values_.size() * sizeof(values_[0]));
  176. all_poisoned_ = false;
  177. #endif
  178. }
  179. // Poison a single element.
  180. auto PoisonElement([[maybe_unused]] int element) const -> void {
  181. #if CARBON_POISON_VALUE_STORES
  182. __asan_unpoison_memory_region(values_.data() + element, sizeof(values_[0]));
  183. #endif
  184. }
  185. // Unpoison a single element.
  186. auto UnpoisonElement([[maybe_unused]] int element) const -> void {
  187. #if CARBON_POISON_VALUE_STORES
  188. __asan_unpoison_memory_region(values_.data() + element, sizeof(values_[0]));
  189. all_poisoned_ = false;
  190. #endif
  191. }
  192. // Set inline size to 0 because these will typically be too large for the
  193. // stack, while this does make File smaller.
  194. llvm::SmallVector<std::decay_t<ValueType>, 0> values_;
  195. #if CARBON_POISON_VALUE_STORES
  196. // Whether the vector is currently fully poisoned.
  197. //
  198. // We use this to avoid repeated re-poisoning of the entire store. Doing so is
  199. // linear in the size of the store, and we trigger re-poisoning frequently,
  200. // for example on each import. Tracking that here allows us to coalesce these
  201. // into a single linear operation.
  202. mutable bool all_poisoned_ = true;
  203. #endif
  204. };
  205. // A wrapper for accumulating immutable values with deduplication, providing IDs
  206. // to later retrieve the value.
  207. //
  208. // IdT::ValueType must represent the type being indexed.
  209. template <typename IdT>
  210. class CanonicalValueStore {
  211. public:
  212. using ValueType = typename IdT::ValueType;
  213. using RefType = typename ValueStore<IdT>::RefType;
  214. using ConstRefType = typename ValueStore<IdT>::ConstRefType;
  215. // Stores a canonical copy of the value and returns an ID to reference it.
  216. auto Add(ValueType value) -> IdT;
  217. // Returns the value for an ID.
  218. auto Get(IdT id) const -> ConstRefType { return values_.Get(id); }
  219. // Looks up the canonical ID for a value, or returns `None` if not in the
  220. // store.
  221. auto Lookup(ValueType value) const -> IdT;
  222. // Reserves space.
  223. auto Reserve(size_t size) -> void;
  224. // Invalidates all current pointers and references into the value store. Used
  225. // in debug builds to trigger use-after-invalidation bugs.
  226. auto Invalidate() -> void { values_.Invalidate(); }
  227. // These are to support printable structures, and are not guaranteed.
  228. auto OutputYaml() const -> Yaml::OutputMapping {
  229. return values_.OutputYaml();
  230. }
  231. auto array_ref() const -> llvm::ArrayRef<ValueType> {
  232. return values_.array_ref();
  233. }
  234. auto size() const -> size_t { return values_.size(); }
  235. // Collects memory usage of the values and deduplication set.
  236. auto CollectMemUsage(MemUsage& mem_usage, llvm::StringRef label) const
  237. -> void {
  238. mem_usage.Collect(MemUsage::ConcatLabel(label, "values_"), values_);
  239. auto bytes =
  240. set_.ComputeMetrics(KeyContext(values_.array_ref())).storage_bytes;
  241. mem_usage.Add(MemUsage::ConcatLabel(label, "set_"), bytes, bytes);
  242. }
  243. private:
  244. class KeyContext;
  245. ValueStore<IdT> values_;
  246. Set<IdT, /*SmallSize=*/0, KeyContext> set_;
  247. };
  248. template <typename IdT>
  249. class CanonicalValueStore<IdT>::KeyContext
  250. : public TranslatingKeyContext<KeyContext> {
  251. public:
  252. explicit KeyContext(llvm::ArrayRef<ValueType> values) : values_(values) {}
  253. // Note that it is safe to return a `const` reference here as the underlying
  254. // object's lifetime is provided by the `store_`.
  255. auto TranslateKey(IdT id) const -> const ValueType& {
  256. return values_[id.index];
  257. }
  258. private:
  259. llvm::ArrayRef<ValueType> values_;
  260. };
  261. template <typename IdT>
  262. auto CanonicalValueStore<IdT>::Add(ValueType value) -> IdT {
  263. auto make_key = [&] { return IdT(values_.Add(std::move(value))); };
  264. return set_.Insert(value, make_key, KeyContext(values_.array_ref())).key();
  265. }
  266. template <typename IdT>
  267. auto CanonicalValueStore<IdT>::Lookup(ValueType value) const -> IdT {
  268. if (auto result = set_.Lookup(value, KeyContext(values_.array_ref()))) {
  269. return result.key();
  270. }
  271. return IdT::None;
  272. }
  273. template <typename IdT>
  274. auto CanonicalValueStore<IdT>::Reserve(size_t size) -> void {
  275. // Compute the resulting new insert count using the size of values -- the
  276. // set doesn't have a fast to compute current size.
  277. if (size > values_.size()) {
  278. set_.GrowForInsertCount(size - values_.size(),
  279. KeyContext(values_.array_ref()));
  280. }
  281. values_.Reserve(size);
  282. }
  283. // A ValueStore that builds a 1:1 relationship between two IDs.
  284. // * `RelatedIdT` represents a related ID that can be used to find values in the
  285. // store.
  286. // * `IdT` is the actual ID of values in this store, and `IdT::ValueType` is the
  287. // value type being stored.
  288. //
  289. // The value store builds a mapping so that either ID can be used later to find
  290. // a value. And the user can query if a related `RelatedIdT` has been used to
  291. // add a value to the store or not.
  292. //
  293. // When adding to the store, the user provides the related `RelatedIdT` along
  294. // with the value being stored, and gets back the ID of the value in the store.
  295. //
  296. // This store requires more storage space than normal ValueStore does, as it
  297. // requires storing a bit for presence of each `RelatedIdT`. And it allocates
  298. // memory for values for all IDs up largest ID present in the store, even if
  299. // they are not yet used.
  300. template <typename RelatedIdT, typename IdT>
  301. class RelationalValueStore {
  302. public:
  303. using ValueType = IdT::ValueType;
  304. using ConstRefType = ValueStore<IdT>::ConstRefType;
  305. // Given the related ID and a value, stores the value and returns a mapped ID
  306. // to reference it in the store.
  307. auto Add(RelatedIdT related_id, ValueType value) -> IdT {
  308. CARBON_DCHECK(related_id.index >= 0, "{0}", related_id);
  309. IdT id(related_id.index);
  310. if (static_cast<size_t>(id.index) >= values_.size()) {
  311. values_.resize(id.index + 1);
  312. }
  313. auto& opt = values_[id.index];
  314. CARBON_CHECK(!opt.has_value(),
  315. "Add with `related_id` that was already added to the store");
  316. opt.emplace(std::move(value));
  317. return id;
  318. }
  319. // Returns the ID of a value in the store if the `related_id` was previously
  320. // used to add a value to the store, or None.
  321. auto TryGetId(RelatedIdT related_id) const -> IdT {
  322. CARBON_DCHECK(related_id.index >= 0, "{0}", related_id);
  323. if (static_cast<size_t>(related_id.index) >= values_.size()) {
  324. return IdT::None;
  325. }
  326. auto& opt = values_[related_id.index];
  327. if (!opt.has_value()) {
  328. return IdT::None;
  329. }
  330. return IdT(related_id.index);
  331. }
  332. // Returns a value for an ID.
  333. auto Get(IdT id) const -> ConstRefType {
  334. CARBON_DCHECK(id.index >= 0, "{0}", id);
  335. return *values_[id.index];
  336. }
  337. private:
  338. // Set inline size to 0 because these will typically be too large for the
  339. // stack, while this does make File smaller.
  340. llvm::SmallVector<std::optional<std::decay_t<ValueType>>, 0> values_;
  341. };
  342. } // namespace Carbon
  343. #endif // CARBON_TOOLCHAIN_BASE_VALUE_STORE_H_