value_store.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #ifndef CARBON_TOOLCHAIN_BASE_VALUE_STORE_H_
  5. #define CARBON_TOOLCHAIN_BASE_VALUE_STORE_H_
  6. #include <bit>
  7. #include <cstddef>
  8. #include <limits>
  9. #include <type_traits>
  10. #include <utility>
  11. #include "common/check.h"
  12. #include "common/ostream.h"
  13. #include "llvm/ADT/STLExtras.h"
  14. #include "llvm/ADT/Sequence.h"
  15. #include "llvm/ADT/SmallVector.h"
  16. #include "llvm/Support/MemAlloc.h"
  17. #include "toolchain/base/mem_usage.h"
  18. #include "toolchain/base/value_store_types.h"
  19. #include "toolchain/base/yaml.h"
  20. namespace Carbon {
  21. namespace Internal {
  22. // Used as a parent class for non-printable types. This is just for
  23. // std::conditional, not as an API.
  24. class ValueStoreNotPrintable {};
  25. } // namespace Internal
  26. // A simple wrapper for accumulating values, providing IDs to later retrieve the
  27. // value. This does not do deduplication.
  28. template <typename IdT, typename ValueT>
  29. class ValueStore
  30. : public std::conditional<std::is_base_of_v<Printable<ValueT>, ValueT>,
  31. Yaml::Printable<ValueStore<IdT, ValueT>>,
  32. Internal::ValueStoreNotPrintable> {
  33. public:
  34. using IdType = IdT;
  35. using ValueType = ValueStoreTypes<IdT, ValueT>::ValueType;
  36. using RefType = ValueStoreTypes<IdT, ValueT>::RefType;
  37. using ConstRefType = ValueStoreTypes<IdT, ValueT>::ConstRefType;
  38. // A range over references to the values in a ValueStore, returned from
  39. // `ValueStore::values()`. Hides the complex type name of the iterator
  40. // internally to provide a type name (`Range`) that can be
  41. // referred to without auto and templates.
  42. class Range {
  43. public:
  44. explicit Range(const ValueStore& store [[clang::lifetimebound]])
  45. : flattened_range_(MakeFlattenedRange(store)) {}
  46. auto begin() const -> auto { return flattened_range_.begin(); }
  47. auto end() const -> auto { return flattened_range_.end(); }
  48. private:
  49. // Flattens the range of `Chunk`s of `ValueType`s into a single
  50. // range of `ValueType`s.
  51. static auto MakeFlattenedRange(const ValueStore& store) -> auto {
  52. // Because indices into `ValueStore` are all sequential values from 0, we
  53. // can use llvm::seq to walk all indices in the store.
  54. return llvm::map_range(
  55. llvm::seq(store.size_),
  56. [&](int32_t i) -> ConstRefType { return store.Get(IdType(i)); });
  57. }
  58. using FlattenedRangeType =
  59. decltype(MakeFlattenedRange(std::declval<const ValueStore&>()));
  60. FlattenedRangeType flattened_range_;
  61. };
  62. ValueStore() = default;
  63. // Stores the value and returns an ID to reference it.
  64. auto Add(ValueType value) -> IdType {
  65. // This routine is especially hot and the check here relatively expensive
  66. // for the value provided, so only do this in non-optimized builds to make
  67. // tracking down issues easier.
  68. CARBON_DCHECK(size_ < std::numeric_limits<int32_t>::max(), "Id overflow");
  69. IdType id(size_);
  70. auto [chunk_index, pos] = IdToChunkIndices(id);
  71. ++size_;
  72. CARBON_DCHECK(static_cast<size_t>(chunk_index) <= chunks_.size(),
  73. "{0} <= {1}", chunk_index, chunks_.size());
  74. if (static_cast<size_t>(chunk_index) == chunks_.size()) {
  75. chunks_.emplace_back();
  76. }
  77. CARBON_DCHECK(pos == chunks_[chunk_index].size());
  78. chunks_[chunk_index].push(std::move(value));
  79. return id;
  80. }
  81. // Returns a mutable value for an ID.
  82. auto Get(IdType id) -> RefType {
  83. CARBON_DCHECK(id.index >= 0, "{0}", id);
  84. CARBON_DCHECK(id.index < size_, "{0}", id);
  85. auto [chunk_index, pos] = IdToChunkIndices(id);
  86. return chunks_[chunk_index].at(pos);
  87. }
  88. // Returns the value for an ID.
  89. auto Get(IdType id) const -> ConstRefType {
  90. CARBON_DCHECK(id.index >= 0, "{0}", id);
  91. CARBON_DCHECK(id.index < size_, "{0}", id);
  92. auto [chunk_index, pos] = IdToChunkIndices(id);
  93. return chunks_[chunk_index].at(pos);
  94. }
  95. // Reserves space.
  96. auto Reserve(size_t size) -> void {
  97. // We get the number of chunks needed to satisfy `size` by rounding any
  98. // partial result up.
  99. size_t num_more_chunks = (size + Chunk::Capacity() - 1) / Chunk::Capacity();
  100. if (chunks_.size() < num_more_chunks) {
  101. // We resize() rather than reserve() here to create the new `ChunkType`
  102. // objects, which will in turn allocate space for values in those chunks
  103. // (but not initialize them).
  104. chunks_.resize(num_more_chunks);
  105. }
  106. }
  107. // These are to support printable structures, and are not guaranteed.
  108. auto OutputYaml() const -> Yaml::OutputMapping {
  109. return Yaml::OutputMapping([&](Yaml::OutputMapping::Map map) {
  110. for (auto [id, value] : enumerate()) {
  111. map.Add(PrintToString(id), Yaml::OutputScalar(value));
  112. }
  113. });
  114. }
  115. // Collects memory usage of the values.
  116. auto CollectMemUsage(MemUsage& mem_usage, llvm::StringRef label) const
  117. -> void {
  118. mem_usage.Add(label.str(), size_ * sizeof(ValueType),
  119. Chunk::CapacityBytes * chunks_.size());
  120. }
  121. auto size() const -> size_t { return size_; }
  122. // Makes an iterable range over references to all values in the ValueStore.
  123. auto values() const [[clang::lifetimebound]] -> Range { return Range(*this); }
  124. // Makes an iterable range over pairs of the index and a reference to the
  125. // value for each value in the store.
  126. //
  127. // The range is over references to the values in the store, even if used with
  128. // `auto` to destructure the pair. In this example, the `value` is a
  129. // `ConstRefType`:
  130. // ```
  131. // for (auto [id, value] : store.enumerate()) { ... }
  132. // ```
  133. auto enumerate() const [[clang::lifetimebound]] -> auto {
  134. // For `it->val`, writing `const std::pair` is required; otherwise
  135. // `mapped_iterator` incorrectly infers the pointer type for `PointerProxy`.
  136. // NOLINTNEXTLINE(readability-const-return-type)
  137. auto index_to_id = [&](int32_t i) -> const std::pair<IdType, ConstRefType> {
  138. return std::pair<IdType, ConstRefType>(IdType(i), Get(IdType(i)));
  139. };
  140. // Because indices into `ValueStore` are all sequential values from 0, we
  141. // can use llvm::seq to walk all indices in the store.
  142. return llvm::map_range(llvm::seq(size_), index_to_id);
  143. }
  144. private:
  145. // A chunk of `ValueType`s which has a fixed capacity, but variable size.
  146. // Tracks the size internally for verifying bounds.
  147. struct Chunk {
  148. public:
  149. // The max size of each chunk allocation for `ValueStore`. This is based on
  150. // TLB page sizes for the target platform.
  151. //
  152. // See https://docs.kernel.org/admin-guide/mm/hugetlbpage.html
  153. //
  154. // A 4K chunk size outperforms a 1M chunk size on Linux-x64 and MacOS-arm64
  155. // in benchmarks and when running file_test.
  156. //
  157. // Linux-x64: x64 CPUs support 4K and 2M page sizes, but we see 1M is slower
  158. // than 4K with tcmalloc in opt builds for our tests.
  159. //
  160. // Mac-arm64: arm64 CPUs support 4K, 8K, 64K, 256K, 1M, 4M and up. Like for
  161. // Linux-x64, 4K outperformed 1M. We didn't try other sizes yet.
  162. //
  163. // TODO: Is there a more optimize size for Mac-arm64? What should
  164. // Linux-arm64 and Mac-x64 use? What should Windows use?
  165. //
  166. // TODO: The previous SmallVector<ValueType> seems to outperform 4K chunks
  167. // (they may be slower by up to 5%) in benchmarks. Find ways to make
  168. // chunking faster. Should successive chunks get larger in size? That will
  169. // greatly complicate math for choosing a chunk though.
  170. static constexpr auto MaxAllocationBytes() -> int32_t {
  171. #if !defined(NDEBUG) || LLVM_ADDRESS_SANITIZER_BUILD
  172. // Use a small size in unoptimized builds to ensure multiple chunks get
  173. // used. And do the same in ASAN builds to reduce bookkeeping overheads.
  174. // Using large allocations (e.g. 1M+) incurs a 10x runtime cost for our
  175. // tests under ASAN.
  176. return sizeof(ValueType) * 5;
  177. #else
  178. return 4 * 1024;
  179. #endif
  180. }
  181. // The number of elements stored in each chunk allocation.
  182. //
  183. // The number must be a power of two so that that there are no unused values
  184. // in bits indexing into the allocation.
  185. static constexpr auto Capacity() -> int32_t {
  186. constexpr auto MaxElements = MaxAllocationBytes() / sizeof(ValueType);
  187. return std::bit_floor(MaxElements);
  188. }
  189. // The number of bits needed to index each element in a chunk allocation.
  190. static constexpr auto IndexBits() -> int32_t {
  191. static_assert(Capacity() > 0);
  192. return std::bit_width(uint32_t{Capacity() - 1});
  193. }
  194. static constexpr auto CapacityBytes = Capacity() * sizeof(ValueType);
  195. explicit Chunk()
  196. : buf_(reinterpret_cast<ValueType*>(
  197. llvm::allocate_buffer(CapacityBytes, alignof(ValueType)))) {}
  198. // Moving leaves nullptr behind in the moved-from object so that the
  199. // destructor is a no-op (preventing double free).
  200. Chunk(Chunk&& rhs) noexcept
  201. : buf_(std::exchange(rhs.buf_, nullptr)), num_(rhs.num_) {}
  202. auto operator=(Chunk&& rhs) noexcept -> Chunk& {
  203. buf_ = std::exchange(rhs.buf_, nullptr);
  204. num_ = rhs.num_;
  205. return *this;
  206. }
  207. ~Chunk() {
  208. if (buf_) {
  209. if constexpr (!std::is_trivially_destructible_v<ValueType>) {
  210. std::destroy_n(buf_, num_);
  211. }
  212. llvm::deallocate_buffer(buf_, CapacityBytes, alignof(ValueType));
  213. }
  214. }
  215. auto at(int32_t i) -> ValueType& {
  216. CARBON_CHECK(i < num_, "{0}", i);
  217. return buf_[i];
  218. }
  219. auto at(int32_t i) const -> const ValueType& {
  220. CARBON_CHECK(i < num_, "{0}", i);
  221. return buf_[i];
  222. }
  223. auto push(ValueType&& value) -> void {
  224. CARBON_CHECK(num_ < Capacity());
  225. std::construct_at(buf_ + num_, std::move(value));
  226. ++num_;
  227. }
  228. auto size() const -> int32_t { return num_; }
  229. private:
  230. // Verify using an `int32_t` for `num_` is sound.
  231. static_assert(Capacity() <= std::numeric_limits<int32_t>::max());
  232. ValueType* buf_;
  233. int32_t num_ = 0;
  234. };
  235. // Converts an id into an index into the set of chunks, and an offset into
  236. // that specific chunk. Looks for index overflow in non-optimized builds.
  237. static auto IdToChunkIndices(IdType id) -> std::pair<int32_t, int32_t> {
  238. constexpr auto LowBits = Chunk::IndexBits();
  239. // Verify there are no unused bits when indexing up to the `Capacity`. This
  240. // ensures that ids are contiguous values from 0, as if the values were all
  241. // stored in a single array, and allows using the ids to index into other
  242. // arrays.
  243. static_assert((1 << LowBits) == Chunk::Capacity());
  244. // Simple check to make sure nothing went wildly wrong with the `Capacity`,
  245. // and we have some room for a chunk index, and that shifting by the number
  246. // of bits won't be UB in an int32_t.
  247. static_assert(LowBits < 30);
  248. // The index of the chunk is the high bits.
  249. auto chunk = id.index >> LowBits;
  250. // The index into the chunk is the low bits.
  251. auto pos = id.index & ((1 << LowBits) - 1);
  252. return {chunk, pos};
  253. }
  254. // Number of elements added to the store. The number should never exceed what
  255. // fits in an `int32_t`, which is checked in non-optimized builds in Add().
  256. int32_t size_ = 0;
  257. // Storage for the `ValueType` objects, indexed by the id. We use a vector of
  258. // chunks of `ValueType` instead of just a vector of `ValueType` so that
  259. // addresses of `ValueType` objects are stable. This allows the rest of the
  260. // toolchain to hold references into `ValueStore` without having to worry
  261. // about invalidation and use-after-free. We ensure at least one Chunk is held
  262. // inline so that in the case where there is only a single Chunk (i.e. small
  263. // files) we can avoid one indirection.
  264. llvm::SmallVector<Chunk, 1> chunks_;
  265. };
  266. } // namespace Carbon
  267. #endif // CARBON_TOOLCHAIN_BASE_VALUE_STORE_H_