value_store.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #ifndef CARBON_TOOLCHAIN_BASE_VALUE_STORE_H_
  5. #define CARBON_TOOLCHAIN_BASE_VALUE_STORE_H_
  6. #include <memory>
  7. #include <type_traits>
  8. #include <utility>
  9. #include "common/check.h"
  10. #include "common/hashtable_key_context.h"
  11. #include "common/ostream.h"
  12. #include "common/set.h"
  13. #include "llvm/ADT/STLExtras.h"
  14. #include "llvm/ADT/Sequence.h"
  15. #include "llvm/ADT/SmallVector.h"
  16. #include "llvm/ADT/StringRef.h"
  17. #include "llvm/ADT/iterator_range.h"
  18. #include "llvm/Support/Compiler.h"
  19. #include "toolchain/base/mem_usage.h"
  20. #include "toolchain/base/value_store_chunk.h"
  21. #include "toolchain/base/yaml.h"
  22. namespace Carbon {
  23. namespace Internal {
  24. // Used as a parent class for non-printable types. This is just for
  25. // std::conditional, not as an API.
  26. class ValueStoreNotPrintable {};
  27. } // namespace Internal
  28. template <class IdT>
  29. class ValueStoreRange;
  30. // Common calculation for ValueStore types.
  31. template <typename IdT, typename ValueT = IdT::ValueType>
  32. class ValueStoreTypes {
  33. public:
  34. using ValueType = std::decay_t<ValueT>;
  35. // Typically we want to use `ValueType&` and `const ValueType& to avoid
  36. // copies, but when the value type is a `StringRef`, we assume external
  37. // storage for the string data and both our value type and ref type will be
  38. // `StringRef`. This will preclude mutation of the string data.
  39. using RefType = std::conditional_t<std::same_as<llvm::StringRef, ValueType>,
  40. llvm::StringRef, ValueType&>;
  41. using ConstRefType =
  42. std::conditional_t<std::same_as<llvm::StringRef, ValueType>,
  43. llvm::StringRef, const ValueType&>;
  44. };
  45. // A simple wrapper for accumulating values, providing IDs to later retrieve the
  46. // value. This does not do deduplication.
  47. //
  48. // IdT::ValueType must represent the type being indexed.
  49. template <typename IdT>
  50. requires(Internal::IdHasValueType<IdT>)
  51. class ValueStore
  52. : public std::conditional<
  53. std::is_base_of_v<Printable<typename IdT::ValueType>,
  54. typename IdT::ValueType>,
  55. Yaml::Printable<ValueStore<IdT>>, Internal::ValueStoreNotPrintable> {
  56. public:
  57. using ValueType = ValueStoreTypes<IdT>::ValueType;
  58. using RefType = ValueStoreTypes<IdT>::RefType;
  59. using ConstRefType = ValueStoreTypes<IdT>::ConstRefType;
  60. ValueStore() = default;
  61. // Stores the value and returns an ID to reference it.
  62. auto Add(ValueType value) -> IdT {
  63. // This routine is especially hot and the check here relatively expensive
  64. // for the value provided, so only do this in non-optimized builds to make
  65. // tracking down issues easier.
  66. CARBON_DCHECK(size_ < std::numeric_limits<int32_t>::max(), "Id overflow");
  67. IdT id(size_);
  68. auto [chunk_index, pos] = Internal::IdToChunkIndices(id);
  69. ++size_;
  70. CARBON_DCHECK(static_cast<size_t>(chunk_index) <= chunks_.size(),
  71. "{0} <= {1}", chunk_index, chunks_.size());
  72. if (static_cast<size_t>(chunk_index) == chunks_.size()) {
  73. chunks_.emplace_back();
  74. }
  75. CARBON_DCHECK(pos == chunks_[chunk_index].size());
  76. chunks_[chunk_index].push(std::move(value));
  77. return id;
  78. }
  79. // Returns a mutable value for an ID.
  80. auto Get(IdT id) -> RefType {
  81. CARBON_DCHECK(id.index >= 0, "{0}", id);
  82. CARBON_DCHECK(id.index < size_, "{0}", id);
  83. auto [chunk_index, pos] = Internal::IdToChunkIndices(id);
  84. return chunks_[chunk_index].at(pos);
  85. }
  86. // Returns the value for an ID.
  87. auto Get(IdT id) const -> ConstRefType {
  88. CARBON_DCHECK(id.index >= 0, "{0}", id);
  89. CARBON_DCHECK(id.index < size_, "{0}", id);
  90. auto [chunk_index, pos] = Internal::IdToChunkIndices(id);
  91. return chunks_[chunk_index].at(pos);
  92. }
  93. // Reserves space.
  94. auto Reserve(size_t size) -> void {
  95. // We get the number of chunks needed to satisfy `size` by rounding any
  96. // partial result up.
  97. size_t num_more_chunks =
  98. (size + ChunkType::Capacity - 1) / ChunkType::Capacity;
  99. if (chunks_.size() < num_more_chunks) {
  100. // We resize() rather than reserve() here to create the new `ChunkType`
  101. // objects, which will in turn allocate space for values in those chunks
  102. // (but not initialize them).
  103. chunks_.resize(num_more_chunks);
  104. }
  105. }
  106. // These are to support printable structures, and are not guaranteed.
  107. auto OutputYaml() const -> Yaml::OutputMapping {
  108. return Yaml::OutputMapping([&](Yaml::OutputMapping::Map map) {
  109. for (auto [id, value] : enumerate()) {
  110. map.Add(PrintToString(id), Yaml::OutputScalar(value));
  111. }
  112. });
  113. }
  114. // Collects memory usage of the values.
  115. auto CollectMemUsage(MemUsage& mem_usage, llvm::StringRef label) const
  116. -> void {
  117. mem_usage.Add(label.str(), size_ * sizeof(ValueType),
  118. ChunkType::CapacityBytes * chunks_.size());
  119. }
  120. auto size() const -> size_t { return size_; }
  121. // Makes an iterable range over references to all values in the ValueStore.
  122. auto values() const [[clang::lifetimebound]] -> ValueStoreRange<IdT> {
  123. return ValueStoreRange<IdT>(*this);
  124. }
  125. // Makes an iterable range over pairs of the index and a reference to the
  126. // value for each value in the store.
  127. //
  128. // The range is over references to the values in the store, even if used with
  129. // `auto` to destructure the pair. In this example, the `value` is a
  130. // `ConstRefType`:
  131. // ```
  132. // for (auto [id, value] : store.enumerate()) { ... }
  133. // ```
  134. auto enumerate() const [[clang::lifetimebound]] -> auto {
  135. auto index_to_id = [&](int32_t i) -> std::pair<IdT, ConstRefType> {
  136. return std::pair<IdT, ConstRefType>(IdT(i), Get(IdT(i)));
  137. };
  138. // Because indices into `ValueStore` are all sequential values from 0, we
  139. // can use llvm::seq to walk all indices in the store.
  140. return llvm::map_range(llvm::seq(size_), index_to_id);
  141. }
  142. private:
  143. friend class ValueStoreRange<IdT>;
  144. using ChunkType = Internal::ValueStoreChunk<IdT, ValueType>;
  145. // Number of elements added to the store. The number should never exceed what
  146. // fits in an `int32_t`, which is checked in non-optimized builds in Add().
  147. int32_t size_ = 0;
  148. // Storage for the `ValueType` objects, indexed by the id. We use a vector of
  149. // chunks of `ValueType` instead of just a vector of `ValueType` so that
  150. // addresses of `ValueType` objects are stable. This allows the rest of the
  151. // toolchain to hold references into `ValueStore` without having to worry
  152. // about invalidation and use-after-free. We ensure at least one Chunk is held
  153. // inline so that in the case where there is only a single Chunk (i.e. small
  154. // files) we can avoid one indirection.
  155. llvm::SmallVector<ChunkType, 1> chunks_;
  156. };
  157. // A range over references to the values in a ValueStore, returned from
  158. // `ValueStore::values()`. Hides the complex type name of the iterator
  159. // internally to provide a type name (`ValueStoreRange<IdT>`) that can be
  160. // referred to without auto and templates.
  161. template <class IdT>
  162. class ValueStoreRange {
  163. public:
  164. explicit ValueStoreRange(const ValueStore<IdT>& store
  165. [[clang::lifetimebound]])
  166. : flattened_range_(MakeFlattenedRange(store)) {}
  167. auto begin() const -> auto { return flattened_range_.begin(); }
  168. auto end() const -> auto { return flattened_range_.end(); }
  169. private:
  170. // Flattens the range of `ValueStoreChunk`s of `ValueType`s into a single
  171. // range of `ValueType`s.
  172. static auto MakeFlattenedRange(const ValueStore<IdT>& store) -> auto {
  173. // Because indices into `ValueStore` are all sequential values from 0, we
  174. // can use llvm::seq to walk all indices in the store.
  175. return llvm::map_range(llvm::seq(store.size_),
  176. [&](int32_t i) -> ValueStore<IdT>::ConstRefType {
  177. return store.Get(IdT(i));
  178. });
  179. }
  180. using FlattenedRangeType =
  181. decltype(MakeFlattenedRange(std::declval<const ValueStore<IdT>&>()));
  182. FlattenedRangeType flattened_range_;
  183. };
  184. // A wrapper for accumulating immutable values with deduplication, providing IDs
  185. // to later retrieve the value.
  186. //
  187. // IdT::ValueType must represent the type being indexed.
  188. template <typename IdT>
  189. class CanonicalValueStore {
  190. public:
  191. using ValueType = ValueStoreTypes<IdT>::ValueType;
  192. using RefType = ValueStoreTypes<IdT>::RefType;
  193. using ConstRefType = ValueStoreTypes<IdT>::ConstRefType;
  194. // Stores a canonical copy of the value and returns an ID to reference it.
  195. auto Add(ValueType value) -> IdT;
  196. // Returns the value for an ID.
  197. auto Get(IdT id) const -> ConstRefType { return values_.Get(id); }
  198. // Looks up the canonical ID for a value, or returns `None` if not in the
  199. // store.
  200. auto Lookup(ValueType value) const -> IdT;
  201. // Reserves space.
  202. auto Reserve(size_t size) -> void;
  203. // These are to support printable structures, and are not guaranteed.
  204. auto OutputYaml() const -> Yaml::OutputMapping {
  205. return values_.OutputYaml();
  206. }
  207. auto values() const [[clang::lifetimebound]] -> ValueStoreRange<IdT> {
  208. return values_.values();
  209. }
  210. auto size() const -> size_t { return values_.size(); }
  211. // Collects memory usage of the values and deduplication set.
  212. auto CollectMemUsage(MemUsage& mem_usage, llvm::StringRef label) const
  213. -> void {
  214. mem_usage.Collect(MemUsage::ConcatLabel(label, "values_"), values_);
  215. auto bytes = set_.ComputeMetrics(KeyContext(&values_)).storage_bytes;
  216. mem_usage.Add(MemUsage::ConcatLabel(label, "set_"), bytes, bytes);
  217. }
  218. private:
  219. class KeyContext;
  220. ValueStore<IdT> values_;
  221. Set<IdT, /*SmallSize=*/0, KeyContext> set_;
  222. };
  223. template <typename IdT>
  224. class CanonicalValueStore<IdT>::KeyContext
  225. : public TranslatingKeyContext<KeyContext> {
  226. public:
  227. explicit KeyContext(const ValueStore<IdT>* values) : values_(values) {}
  228. // Note that it is safe to return a `const` reference here as the underlying
  229. // object's lifetime is provided by the `ValueStore`.
  230. auto TranslateKey(IdT id) const -> ValueStore<IdT>::ConstRefType {
  231. return values_->Get(id);
  232. }
  233. private:
  234. const ValueStore<IdT>* values_;
  235. };
  236. template <typename IdT>
  237. auto CanonicalValueStore<IdT>::Add(ValueType value) -> IdT {
  238. auto make_key = [&] { return IdT(values_.Add(std::move(value))); };
  239. return set_.Insert(value, make_key, KeyContext(&values_)).key();
  240. }
  241. template <typename IdT>
  242. auto CanonicalValueStore<IdT>::Lookup(ValueType value) const -> IdT {
  243. if (auto result = set_.Lookup(value, KeyContext(&values_))) {
  244. return result.key();
  245. }
  246. return IdT::None;
  247. }
  248. template <typename IdT>
  249. auto CanonicalValueStore<IdT>::Reserve(size_t size) -> void {
  250. // Compute the resulting new insert count using the size of values -- the
  251. // set doesn't have a fast to compute current size.
  252. if (size > values_.size()) {
  253. set_.GrowForInsertCount(size - values_.size(), KeyContext(&values_));
  254. }
  255. values_.Reserve(size);
  256. }
  257. // A ValueStore that builds a 1:1 relationship between two IDs.
  258. // * `RelatedIdT` represents a related ID that can be used to find values in the
  259. // store.
  260. // * `IdT` is the actual ID of values in this store, and `IdT::ValueType` is the
  261. // value type being stored.
  262. //
  263. // The value store builds a mapping so that either ID can be used later to find
  264. // a value. And the user can query if a related `RelatedIdT` has been used to
  265. // add a value to the store or not.
  266. //
  267. // When adding to the store, the user provides the related `RelatedIdT` along
  268. // with the value being stored, and gets back the ID of the value in the store.
  269. //
  270. // This store requires more storage space than normal ValueStore does, as it
  271. // requires storing a bit for presence of each `RelatedIdT`. And it allocates
  272. // memory for values for all IDs up largest ID present in the store, even if
  273. // they are not yet used.
  274. template <typename RelatedIdT, typename IdT>
  275. class RelationalValueStore {
  276. public:
  277. using ValueType = ValueStoreTypes<IdT>::ValueType;
  278. using ConstRefType = ValueStoreTypes<IdT>::ConstRefType;
  279. // Given the related ID and a value, stores the value and returns a mapped ID
  280. // to reference it in the store.
  281. auto Add(RelatedIdT related_id, ValueType value) -> IdT {
  282. CARBON_DCHECK(related_id.index >= 0, "{0}", related_id);
  283. IdT id(related_id.index);
  284. if (static_cast<size_t>(id.index) >= values_.size()) {
  285. values_.resize(id.index + 1);
  286. }
  287. auto& opt = values_[id.index];
  288. CARBON_CHECK(!opt.has_value(),
  289. "Add with `related_id` that was already added to the store");
  290. opt.emplace(std::move(value));
  291. return id;
  292. }
  293. // Returns the ID of a value in the store if the `related_id` was previously
  294. // used to add a value to the store, or None.
  295. auto TryGetId(RelatedIdT related_id) const -> IdT {
  296. CARBON_DCHECK(related_id.index >= 0, "{0}", related_id);
  297. if (static_cast<size_t>(related_id.index) >= values_.size()) {
  298. return IdT::None;
  299. }
  300. auto& opt = values_[related_id.index];
  301. if (!opt.has_value()) {
  302. return IdT::None;
  303. }
  304. return IdT(related_id.index);
  305. }
  306. // Returns a value for an ID.
  307. auto Get(IdT id) const -> ConstRefType {
  308. CARBON_DCHECK(id.index >= 0, "{0}", id);
  309. return *values_[id.index];
  310. }
  311. private:
  312. // Set inline size to 0 because these will typically be too large for the
  313. // stack, while this does make File smaller.
  314. llvm::SmallVector<std::optional<std::decay_t<ValueType>>, 0> values_;
  315. };
  316. } // namespace Carbon
  317. #endif // CARBON_TOOLCHAIN_BASE_VALUE_STORE_H_