value_store.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #ifndef CARBON_TOOLCHAIN_BASE_VALUE_STORE_H_
  5. #define CARBON_TOOLCHAIN_BASE_VALUE_STORE_H_
  6. #include <bit>
  7. #include <cstddef>
  8. #include <limits>
  9. #include <memory>
  10. #include <type_traits>
  11. #include <utility>
  12. #include "common/check.h"
  13. #include "common/ostream.h"
  14. #include "llvm/ADT/STLExtras.h"
  15. #include "llvm/ADT/Sequence.h"
  16. #include "llvm/ADT/SmallVector.h"
  17. #include "llvm/Support/MemAlloc.h"
  18. #include "toolchain/base/mem_usage.h"
  19. #include "toolchain/base/value_store_types.h"
  20. #include "toolchain/base/yaml.h"
  21. namespace Carbon {
  22. namespace Internal {
  23. // Used as a parent class for non-printable types. This is just for
  24. // std::conditional, not as an API.
  25. class ValueStoreNotPrintable {};
  26. } // namespace Internal
  27. // A simple wrapper for accumulating values, providing IDs to later retrieve the
  28. // value. This does not do deduplication.
  29. template <typename IdT, typename ValueT>
  30. class ValueStore
  31. : public std::conditional<std::is_base_of_v<Printable<ValueT>, ValueT>,
  32. Yaml::Printable<ValueStore<IdT, ValueT>>,
  33. Internal::ValueStoreNotPrintable> {
  34. public:
  35. using IdType = IdT;
  36. using ValueType = ValueStoreTypes<ValueT>::ValueType;
  37. using RefType = ValueStoreTypes<ValueT>::RefType;
  38. using ConstRefType = ValueStoreTypes<ValueT>::ConstRefType;
  39. // A range over references to the values in a ValueStore, returned from
  40. // `ValueStore::values()`. Hides the complex type name of the iterator
  41. // internally to provide a type name (`Range`) that can be
  42. // referred to without auto and templates.
  43. class Range {
  44. public:
  45. explicit Range(const ValueStore& store [[clang::lifetimebound]])
  46. : flattened_range_(MakeFlattenedRange(store)) {}
  47. auto begin() const -> auto { return flattened_range_.begin(); }
  48. auto end() const -> auto { return flattened_range_.end(); }
  49. private:
  50. // Flattens the range of `Chunk`s of `ValueType`s into a single
  51. // range of `ValueType`s.
  52. static auto MakeFlattenedRange(const ValueStore& store) -> auto {
  53. // Because indices into `ValueStore` are all sequential values from 0, we
  54. // can use llvm::seq to walk all indices in the store.
  55. return llvm::map_range(
  56. llvm::seq(store.size_),
  57. [&](int32_t i) -> ConstRefType { return store.Get(IdType(i)); });
  58. }
  59. using FlattenedRangeType =
  60. decltype(MakeFlattenedRange(std::declval<const ValueStore&>()));
  61. FlattenedRangeType flattened_range_;
  62. };
  63. ValueStore() = default;
  64. // Stores the value and returns an ID to reference it.
  65. auto Add(ValueType value) -> IdType {
  66. // This routine is especially hot and the check here relatively expensive
  67. // for the value provided, so only do this in non-optimized builds to make
  68. // tracking down issues easier.
  69. CARBON_DCHECK(size_ < std::numeric_limits<int32_t>::max(), "Id overflow");
  70. IdType id(size_);
  71. auto [chunk_index, pos] = IdToChunkIndices(id);
  72. ++size_;
  73. CARBON_DCHECK(static_cast<size_t>(chunk_index) <= chunks_.size(),
  74. "{0} <= {1}", chunk_index, chunks_.size());
  75. if (static_cast<size_t>(chunk_index) == chunks_.size()) {
  76. chunks_.emplace_back();
  77. }
  78. CARBON_DCHECK(pos == chunks_[chunk_index].size());
  79. chunks_[chunk_index].Add(std::move(value));
  80. return id;
  81. }
  82. // Returns a mutable value for an ID.
  83. auto Get(IdType id) -> RefType {
  84. CARBON_DCHECK(id.index >= 0, "{0}", id);
  85. CARBON_DCHECK(id.index < size_, "{0}", id);
  86. auto [chunk_index, pos] = IdToChunkIndices(id);
  87. return chunks_[chunk_index].Get(pos);
  88. }
  89. // Returns the value for an ID.
  90. auto Get(IdType id) const -> ConstRefType {
  91. CARBON_DCHECK(id.index >= 0, "{0}", id);
  92. CARBON_DCHECK(id.index < size_, "{0}", id);
  93. auto [chunk_index, pos] = IdToChunkIndices(id);
  94. return chunks_[chunk_index].Get(pos);
  95. }
  96. // Reserves space.
  97. auto Reserve(int32_t size) -> void {
  98. if (size <= size_) {
  99. return;
  100. }
  101. auto [final_chunk_index, _] = IdToChunkIndices(IdType(size - 1));
  102. chunks_.resize(final_chunk_index + 1);
  103. }
  104. // Grows the ValueStore to `size`. Fills entries with `default_value`.
  105. auto Resize(int32_t size, ConstRefType default_value) -> void {
  106. if (size <= size_) {
  107. return;
  108. }
  109. auto [begin_chunk_index, begin_pos] = IdToChunkIndices(IdType(size_));
  110. // Use an inclusive range so that if `size` would be the next chunk, we
  111. // don't try doing something with it.
  112. auto [end_chunk_index, end_pos] = IdToChunkIndices(IdType(size - 1));
  113. chunks_.resize(end_chunk_index + 1);
  114. // If the begin and end chunks are the same, we only fill from begin to end.
  115. if (begin_chunk_index == end_chunk_index) {
  116. chunks_[begin_chunk_index].UninitializedFill(end_pos - begin_pos + 1,
  117. default_value);
  118. } else {
  119. // Otherwise, we do partial fills on the begin and end chunk, and full
  120. // fills on intermediate chunks.
  121. chunks_[begin_chunk_index].UninitializedFill(
  122. Chunk::Capacity() - begin_pos, default_value);
  123. for (auto i = begin_chunk_index + 1; i < end_chunk_index; ++i) {
  124. chunks_[i].UninitializedFill(Chunk::Capacity(), default_value);
  125. }
  126. chunks_[end_chunk_index].UninitializedFill(end_pos + 1, default_value);
  127. }
  128. // Update size.
  129. size_ = size;
  130. }
  131. // These are to support printable structures, and are not guaranteed.
  132. auto OutputYaml() const -> Yaml::OutputMapping {
  133. return Yaml::OutputMapping([&](Yaml::OutputMapping::Map map) {
  134. for (auto [id, value] : enumerate()) {
  135. map.Add(PrintToString(id), Yaml::OutputScalar(value));
  136. }
  137. });
  138. }
  139. // Collects memory usage of the values.
  140. auto CollectMemUsage(MemUsage& mem_usage, llvm::StringRef label) const
  141. -> void {
  142. mem_usage.Add(label.str(), size_ * sizeof(ValueType),
  143. Chunk::CapacityBytes * chunks_.size());
  144. }
  145. auto size() const -> size_t { return size_; }
  146. // Makes an iterable range over references to all values in the ValueStore.
  147. auto values() [[clang::lifetimebound]] -> auto {
  148. return llvm::map_range(
  149. llvm::seq(size_), [&](int32_t i) -> RefType { return Get(IdType(i)); });
  150. }
  151. auto values() const [[clang::lifetimebound]] -> Range { return Range(*this); }
  152. // Makes an iterable range over pairs of the index and a reference to the
  153. // value for each value in the store.
  154. //
  155. // The range is over references to the values in the store, even if used with
  156. // `auto` to destructure the pair. In this example, the `value` is a
  157. // `ConstRefType`:
  158. // ```
  159. // for (auto [id, value] : store.enumerate()) { ... }
  160. // ```
  161. auto enumerate() const [[clang::lifetimebound]] -> auto {
  162. // For `it->val`, writing `const std::pair` is required; otherwise
  163. // `mapped_iterator` incorrectly infers the pointer type for `PointerProxy`.
  164. // NOLINTNEXTLINE(readability-const-return-type)
  165. auto index_to_id = [&](int32_t i) -> const std::pair<IdType, ConstRefType> {
  166. return std::pair<IdType, ConstRefType>(IdType(i), Get(IdType(i)));
  167. };
  168. // Because indices into `ValueStore` are all sequential values from 0, we
  169. // can use llvm::seq to walk all indices in the store.
  170. return llvm::map_range(llvm::seq(size_), index_to_id);
  171. }
  172. private:
  173. // A chunk of `ValueType`s which has a fixed capacity, but variable size.
  174. // Tracks the size internally for verifying bounds.
  175. struct Chunk {
  176. public:
  177. // The max size of each chunk allocation for `ValueStore`. This is based on
  178. // TLB page sizes for the target platform.
  179. //
  180. // See https://docs.kernel.org/admin-guide/mm/hugetlbpage.html
  181. //
  182. // A 4K chunk size outperforms a 1M chunk size on Linux-x64 and MacOS-arm64
  183. // in benchmarks and when running file_test.
  184. //
  185. // Linux-x64: x64 CPUs support 4K and 2M page sizes, but we see 1M is slower
  186. // than 4K with tcmalloc in opt builds for our tests.
  187. //
  188. // Mac-arm64: arm64 CPUs support 4K, 8K, 64K, 256K, 1M, 4M and up. Like for
  189. // Linux-x64, 4K outperformed 1M. We didn't try other sizes yet.
  190. //
  191. // TODO: Is there a more optimize size for Mac-arm64? What should
  192. // Linux-arm64 and Mac-x64 use? What should Windows use?
  193. //
  194. // TODO: The previous SmallVector<ValueType> seems to outperform 4K chunks
  195. // (they may be slower by up to 5%) in benchmarks. Find ways to make
  196. // chunking faster. Should successive chunks get larger in size? That will
  197. // greatly complicate math for choosing a chunk though.
  198. static constexpr auto MaxAllocationBytes() -> int32_t {
  199. #if !defined(NDEBUG) || LLVM_ADDRESS_SANITIZER_BUILD
  200. // Use a small size in unoptimized builds to ensure multiple chunks get
  201. // used. And do the same in ASAN builds to reduce bookkeeping overheads.
  202. // Using large allocations (e.g. 1M+) incurs a 10x runtime cost for our
  203. // tests under ASAN.
  204. return sizeof(ValueType) * 5;
  205. #else
  206. return 4 * 1024;
  207. #endif
  208. }
  209. // The number of elements stored in each chunk allocation.
  210. //
  211. // The number must be a power of two so that that there are no unused values
  212. // in bits indexing into the allocation.
  213. static constexpr auto Capacity() -> int32_t {
  214. constexpr auto MaxElements = MaxAllocationBytes() / sizeof(ValueType);
  215. return std::bit_floor(MaxElements);
  216. }
  217. // The number of bits needed to index each element in a chunk allocation.
  218. static constexpr auto IndexBits() -> int32_t {
  219. static_assert(Capacity() > 0);
  220. return std::bit_width(uint32_t{Capacity() - 1});
  221. }
  222. static constexpr auto CapacityBytes = Capacity() * sizeof(ValueType);
  223. explicit Chunk()
  224. : buf_(reinterpret_cast<ValueType*>(
  225. llvm::allocate_buffer(CapacityBytes, alignof(ValueType)))) {}
  226. // Moving leaves nullptr behind in the moved-from object so that the
  227. // destructor is a no-op (preventing double free).
  228. Chunk(Chunk&& rhs) noexcept
  229. : buf_(std::exchange(rhs.buf_, nullptr)), num_(rhs.num_) {}
  230. auto operator=(Chunk&& rhs) noexcept -> Chunk& {
  231. buf_ = std::exchange(rhs.buf_, nullptr);
  232. num_ = rhs.num_;
  233. return *this;
  234. }
  235. ~Chunk() {
  236. if (buf_) {
  237. if constexpr (!std::is_trivially_destructible_v<ValueType>) {
  238. std::destroy_n(buf_, num_);
  239. }
  240. llvm::deallocate_buffer(buf_, CapacityBytes, alignof(ValueType));
  241. }
  242. }
  243. auto Get(int32_t i) -> ValueType& {
  244. CARBON_DCHECK(i < num_, "{0}", i);
  245. return buf_[i];
  246. }
  247. auto Get(int32_t i) const -> const ValueType& {
  248. CARBON_DCHECK(i < num_, "{0}", i);
  249. return buf_[i];
  250. }
  251. auto Add(ValueType&& value) -> void {
  252. CARBON_DCHECK(num_ < Capacity());
  253. std::construct_at(buf_ + num_, std::move(value));
  254. ++num_;
  255. }
  256. // Fills `fill_count` entries with `default_value`, increasing the size
  257. // respectively.
  258. auto UninitializedFill(int32_t fill_count, ConstRefType default_value)
  259. -> void {
  260. CARBON_DCHECK(num_ + fill_count <= Capacity());
  261. std::uninitialized_fill_n(buf_ + num_, fill_count, default_value);
  262. num_ += fill_count;
  263. }
  264. auto size() const -> int32_t { return num_; }
  265. private:
  266. // Verify using an `int32_t` for `num_` is sound.
  267. static_assert(Capacity() <= std::numeric_limits<int32_t>::max());
  268. ValueType* buf_;
  269. int32_t num_ = 0;
  270. };
  271. // Converts an id into an index into the set of chunks, and an offset into
  272. // that specific chunk. Looks for index overflow in non-optimized builds.
  273. static auto IdToChunkIndices(IdType id) -> std::pair<int32_t, int32_t> {
  274. constexpr auto LowBits = Chunk::IndexBits();
  275. // Verify there are no unused bits when indexing up to the `Capacity`. This
  276. // ensures that ids are contiguous values from 0, as if the values were all
  277. // stored in a single array, and allows using the ids to index into other
  278. // arrays.
  279. static_assert((1 << LowBits) == Chunk::Capacity());
  280. // Simple check to make sure nothing went wildly wrong with the `Capacity`,
  281. // and we have some room for a chunk index, and that shifting by the number
  282. // of bits won't be UB in an int32_t.
  283. static_assert(LowBits < 30);
  284. // The index of the chunk is the high bits.
  285. auto chunk = id.index >> LowBits;
  286. // The index into the chunk is the low bits.
  287. auto pos = id.index & ((1 << LowBits) - 1);
  288. return {chunk, pos};
  289. }
  290. // Number of elements added to the store. The number should never exceed what
  291. // fits in an `int32_t`, which is checked in non-optimized builds in Add().
  292. int32_t size_ = 0;
  293. // Storage for the `ValueType` objects, indexed by the id. We use a vector of
  294. // chunks of `ValueType` instead of just a vector of `ValueType` so that
  295. // addresses of `ValueType` objects are stable. This allows the rest of the
  296. // toolchain to hold references into `ValueStore` without having to worry
  297. // about invalidation and use-after-free. We ensure at least one Chunk is held
  298. // inline so that in the case where there is only a single Chunk (i.e. small
  299. // files) we can avoid one indirection.
  300. llvm::SmallVector<Chunk, 1> chunks_;
  301. };
  302. } // namespace Carbon
  303. #endif // CARBON_TOOLCHAIN_BASE_VALUE_STORE_H_