value_store.h 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #ifndef CARBON_TOOLCHAIN_BASE_VALUE_STORE_H_
  5. #define CARBON_TOOLCHAIN_BASE_VALUE_STORE_H_
  6. #include <bit>
  7. #include <cstddef>
  8. #include <limits>
  9. #include <memory>
  10. #include <type_traits>
  11. #include <utility>
  12. #include "common/check.h"
  13. #include "common/ostream.h"
  14. #include "llvm/ADT/STLExtras.h"
  15. #include "llvm/ADT/Sequence.h"
  16. #include "llvm/ADT/SmallVector.h"
  17. #include "llvm/Support/MemAlloc.h"
  18. #include "toolchain/base/mem_usage.h"
  19. #include "toolchain/base/value_store_types.h"
  20. #include "toolchain/base/yaml.h"
  21. namespace Carbon {
  22. namespace Internal {
  23. // Used as a parent class for non-printable types. This is just for
  24. // std::conditional, not as an API.
  25. class ValueStoreNotPrintable {};
  26. } // namespace Internal
  27. struct IdTag {
  28. IdTag()
  29. : id_tag_(0),
  30. initial_reserved_ids_(std::numeric_limits<int32_t>::max()) {}
  31. explicit IdTag(int32_t id_index, int32_t initial_reserved_ids)
  32. : initial_reserved_ids_(initial_reserved_ids) {
  33. // Shift down by 1 to get out of the high bit to avoid using any negative
  34. // ids, since they have special uses.
  35. // Shift down by another 1 to free up the second highest bit for a marker to
  36. // indicate whether the index is tagged (& needs to be untagged) or not.
  37. // Add one to the index so it's not zero-based, to make it a bit less likely
  38. // this doesn't collide with anything else (though with the
  39. // second-highest-bit-tagging this might not be needed).
  40. id_tag_ = llvm::reverseBits((((id_index + 1) << 1) | 1) << 1);
  41. }
  42. auto GetCheckIRId() const -> int32_t {
  43. return (llvm::reverseBits(id_tag_) >> 2) - 1;
  44. }
  45. auto Apply(int32_t index) const -> int32_t {
  46. if (index < initial_reserved_ids_) {
  47. return index;
  48. }
  49. // assert that id_tag_ doesn't have the second highest bit set
  50. return index ^ id_tag_;
  51. }
  52. static auto DecomposeWithBestEffort(int32_t tagged_index)
  53. -> std::pair<int32_t, int32_t> {
  54. if (tagged_index < 0) {
  55. return {-1, tagged_index};
  56. }
  57. if ((llvm::reverseBits(2) & tagged_index) == 0) {
  58. return {-1, tagged_index};
  59. }
  60. int length = 0;
  61. int location = 0;
  62. for (int i = 0; i != 32; ++i) {
  63. int current_run = 0;
  64. int location_of_current_run = i;
  65. while (i != 32 && (tagged_index & (1 << i)) == 0) {
  66. ++current_run;
  67. ++i;
  68. }
  69. if (current_run != 0) {
  70. --i;
  71. }
  72. if (current_run > length) {
  73. length = current_run;
  74. location = location_of_current_run;
  75. }
  76. }
  77. if (length < 8) {
  78. return {-1, tagged_index};
  79. }
  80. auto index_mask = llvm::maskTrailingOnes<uint32_t>(location);
  81. auto ir_id = (llvm::reverseBits(tagged_index & ~index_mask) >> 2) - 1;
  82. auto index = tagged_index & index_mask;
  83. return {ir_id, index};
  84. }
  85. auto Remove(int32_t tagged_index) const -> int32_t {
  86. if ((llvm::reverseBits(2) & tagged_index) == 0) {
  87. CARBON_DCHECK(tagged_index < initial_reserved_ids_,
  88. "This untagged index is outside the initial reserved ids "
  89. "and should have been tagged.");
  90. return tagged_index;
  91. }
  92. auto index = tagged_index ^ id_tag_;
  93. CARBON_DCHECK(index >= initial_reserved_ids_,
  94. "When removing tagging bits, found an index that "
  95. "shouldn't've been tagged in the first place.");
  96. return index;
  97. }
  98. int32_t id_tag_;
  99. int32_t initial_reserved_ids_;
  100. };
  101. // A simple wrapper for accumulating values, providing IDs to later retrieve the
  102. // value. This does not do deduplication.
  103. template <typename IdT, typename ValueT>
  104. class ValueStore
  105. : public std::conditional<std::is_base_of_v<Printable<ValueT>, ValueT>,
  106. Yaml::Printable<ValueStore<IdT, ValueT>>,
  107. Internal::ValueStoreNotPrintable> {
  108. public:
  109. using IdType = IdT;
  110. using ValueType = ValueStoreTypes<ValueT>::ValueType;
  111. using RefType = ValueStoreTypes<ValueT>::RefType;
  112. using ConstRefType = ValueStoreTypes<ValueT>::ConstRefType;
  113. // A range over references to the values in a ValueStore, returned from
  114. // `ValueStore::values()`. Hides the complex type name of the iterator
  115. // internally to provide a type name (`Range`) that can be
  116. // referred to without auto and templates.
  117. class Range {
  118. public:
  119. explicit Range(const ValueStore& store [[clang::lifetimebound]])
  120. : flattened_range_(MakeFlattenedRange(store)) {}
  121. auto begin() const -> auto { return flattened_range_.begin(); }
  122. auto end() const -> auto { return flattened_range_.end(); }
  123. private:
  124. // Flattens the range of `Chunk`s of `ValueType`s into a single
  125. // range of `ValueType`s.
  126. static auto MakeFlattenedRange(const ValueStore& store) -> auto {
  127. // Because indices into `ValueStore` are all sequential values from 0, we
  128. // can use llvm::seq to walk all indices in the store.
  129. return llvm::map_range(
  130. llvm::seq(store.size_),
  131. [&](int32_t i) -> ConstRefType { return store.Get(IdType(i)); });
  132. }
  133. using FlattenedRangeType =
  134. decltype(MakeFlattenedRange(std::declval<const ValueStore&>()));
  135. FlattenedRangeType flattened_range_;
  136. };
  137. ValueStore() = default;
  138. explicit ValueStore(IdTag tag) : tag_(tag) {}
  139. // Stores the value and returns an ID to reference it.
  140. auto Add(ValueType value) -> IdType {
  141. // This routine is especially hot and the check here relatively expensive
  142. // for the value provided, so only do this in non-optimized builds to make
  143. // tracking down issues easier.
  144. CARBON_DCHECK(size_ < std::numeric_limits<int32_t>::max(), "Id overflow");
  145. IdType id(tag_.Apply(size_));
  146. auto [chunk_index, pos] = RawIndexToChunkIndices(size_);
  147. ++size_;
  148. CARBON_DCHECK(static_cast<size_t>(chunk_index) <= chunks_.size(),
  149. "{0} <= {1}", chunk_index, chunks_.size());
  150. if (static_cast<size_t>(chunk_index) == chunks_.size()) {
  151. chunks_.emplace_back();
  152. }
  153. CARBON_DCHECK(pos == chunks_[chunk_index].size());
  154. chunks_[chunk_index].Add(std::move(value));
  155. return id;
  156. }
  157. // Returns a mutable value for an ID.
  158. auto Get(IdType id) -> RefType {
  159. auto [chunk_index, pos] = IdToChunkIndices(id);
  160. return chunks_[chunk_index].Get(pos);
  161. }
  162. // Returns the value for an ID.
  163. auto Get(IdType id) const -> ConstRefType {
  164. auto [chunk_index, pos] = IdToChunkIndices(id);
  165. return chunks_[chunk_index].Get(pos);
  166. }
  167. // Reserves space.
  168. auto Reserve(int32_t size) -> void {
  169. if (size <= size_) {
  170. return;
  171. }
  172. auto [final_chunk_index, _] = RawIndexToChunkIndices(size - 1);
  173. chunks_.resize(final_chunk_index + 1);
  174. }
  175. // Grows the ValueStore to `size`. Fills entries with `default_value`.
  176. auto Resize(int32_t size, ConstRefType default_value) -> void {
  177. if (size <= size_) {
  178. return;
  179. }
  180. auto [begin_chunk_index, begin_pos] = RawIndexToChunkIndices(size_);
  181. // Use an inclusive range so that if `size` would be the next chunk, we
  182. // don't try doing something with it.
  183. auto [end_chunk_index, end_pos] = RawIndexToChunkIndices(size - 1);
  184. chunks_.resize(end_chunk_index + 1);
  185. // If the begin and end chunks are the same, we only fill from begin to end.
  186. if (begin_chunk_index == end_chunk_index) {
  187. chunks_[begin_chunk_index].UninitializedFill(end_pos - begin_pos + 1,
  188. default_value);
  189. } else {
  190. // Otherwise, we do partial fills on the begin and end chunk, and full
  191. // fills on intermediate chunks.
  192. chunks_[begin_chunk_index].UninitializedFill(
  193. Chunk::Capacity() - begin_pos, default_value);
  194. for (auto i = begin_chunk_index + 1; i < end_chunk_index; ++i) {
  195. chunks_[i].UninitializedFill(Chunk::Capacity(), default_value);
  196. }
  197. chunks_[end_chunk_index].UninitializedFill(end_pos + 1, default_value);
  198. }
  199. // Update size.
  200. size_ = size;
  201. }
  202. // These are to support printable structures, and are not guaranteed.
  203. auto OutputYaml() const -> Yaml::OutputMapping {
  204. return Yaml::OutputMapping([&](Yaml::OutputMapping::Map map) {
  205. for (auto [id, value] : enumerate()) {
  206. map.Add(PrintToString(id), Yaml::OutputScalar(value));
  207. }
  208. });
  209. }
  210. // Collects memory usage of the values.
  211. auto CollectMemUsage(MemUsage& mem_usage, llvm::StringRef label) const
  212. -> void {
  213. mem_usage.Add(label.str(), size_ * sizeof(ValueType),
  214. Chunk::CapacityBytes * chunks_.size());
  215. }
  216. auto size() const -> size_t { return size_; }
  217. // Makes an iterable range over references to all values in the ValueStore.
  218. auto values() [[clang::lifetimebound]] -> auto {
  219. return llvm::map_range(
  220. llvm::seq(size_), [&](int32_t i) -> RefType { return Get(IdType(i)); });
  221. }
  222. auto values() const [[clang::lifetimebound]] -> Range { return Range(*this); }
  223. // Makes an iterable range over pairs of the index and a reference to the
  224. // value for each value in the store.
  225. //
  226. // The range is over references to the values in the store, even if used with
  227. // `auto` to destructure the pair. In this example, the `value` is a
  228. // `ConstRefType`:
  229. // ```
  230. // for (auto [id, value] : store.enumerate()) { ... }
  231. // ```
  232. auto enumerate() const [[clang::lifetimebound]] -> auto {
  233. // For `it->val`, writing `const std::pair` is required; otherwise
  234. // `mapped_iterator` incorrectly infers the pointer type for `PointerProxy`.
  235. // NOLINTNEXTLINE(readability-const-return-type)
  236. auto index_to_id = [&](int32_t i) -> const std::pair<IdType, ConstRefType> {
  237. IdType id(tag_.Apply(i));
  238. return std::pair<IdType, ConstRefType>(id, Get(id));
  239. };
  240. // Because indices into `ValueStore` are all sequential values from 0, we
  241. // can use llvm::seq to walk all indices in the store.
  242. return llvm::map_range(llvm::seq(size_), index_to_id);
  243. }
  244. auto GetIdTag() const -> IdTag { return tag_; }
  245. auto GetRawIndex(IdT id) const -> int32_t {
  246. auto index = tag_.Remove(id.index);
  247. CARBON_DCHECK(index >= 0, "{0}", index);
  248. // Attempt to decompose id.index to include extra detail in the check here
  249. #ifndef NDEBUG
  250. if (index >= size_) {
  251. auto [ir_id, decomposed_index] = IdTag::DecomposeWithBestEffort(id.index);
  252. CARBON_DCHECK(
  253. index < size_,
  254. "Untagged index was outside of container range. Possibly tagged "
  255. "index {0}. Best-effort decomposition: CheckIRId: {1}, index: {2}. "
  256. "The IdTag for this container is: {3}",
  257. id.index, ir_id, decomposed_index, tag_.GetCheckIRId());
  258. }
  259. #endif
  260. return index;
  261. }
  262. private:
  263. // A chunk of `ValueType`s which has a fixed capacity, but variable size.
  264. // Tracks the size internally for verifying bounds.
  265. struct Chunk {
  266. public:
  267. // The max size of each chunk allocation for `ValueStore`. This is based on
  268. // TLB page sizes for the target platform.
  269. //
  270. // See https://docs.kernel.org/admin-guide/mm/hugetlbpage.html
  271. //
  272. // A 4K chunk size outperforms a 1M chunk size on Linux-x64 and MacOS-arm64
  273. // in benchmarks and when running file_test.
  274. //
  275. // Linux-x64: x64 CPUs support 4K and 2M page sizes, but we see 1M is slower
  276. // than 4K with tcmalloc in opt builds for our tests.
  277. //
  278. // Mac-arm64: arm64 CPUs support 4K, 8K, 64K, 256K, 1M, 4M and up. Like for
  279. // Linux-x64, 4K outperformed 1M. We didn't try other sizes yet.
  280. //
  281. // TODO: Is there a more optimize size for Mac-arm64? What should
  282. // Linux-arm64 and Mac-x64 use? What should Windows use?
  283. //
  284. // TODO: The previous SmallVector<ValueType> seems to outperform 4K chunks
  285. // (they may be slower by up to 5%) in benchmarks. Find ways to make
  286. // chunking faster. Should successive chunks get larger in size? That will
  287. // greatly complicate math for choosing a chunk though.
  288. static constexpr auto MaxAllocationBytes() -> int32_t {
  289. #if !defined(NDEBUG) || LLVM_ADDRESS_SANITIZER_BUILD
  290. // Use a small size in unoptimized builds to ensure multiple chunks get
  291. // used. And do the same in ASAN builds to reduce bookkeeping overheads.
  292. // Using large allocations (e.g. 1M+) incurs a 10x runtime cost for our
  293. // tests under ASAN.
  294. return sizeof(ValueType) * 5;
  295. #else
  296. return 4 * 1024;
  297. #endif
  298. }
  299. // The number of elements stored in each chunk allocation.
  300. //
  301. // The number must be a power of two so that that there are no unused values
  302. // in bits indexing into the allocation.
  303. static constexpr auto Capacity() -> int32_t {
  304. constexpr auto MaxElements = MaxAllocationBytes() / sizeof(ValueType);
  305. return std::bit_floor(MaxElements);
  306. }
  307. // The number of bits needed to index each element in a chunk allocation.
  308. static constexpr auto IndexBits() -> int32_t {
  309. static_assert(Capacity() > 0);
  310. return std::bit_width(uint32_t{Capacity() - 1});
  311. }
  312. static constexpr auto CapacityBytes = Capacity() * sizeof(ValueType);
  313. explicit Chunk()
  314. : buf_(reinterpret_cast<ValueType*>(
  315. llvm::allocate_buffer(CapacityBytes, alignof(ValueType)))) {}
  316. // Moving leaves nullptr behind in the moved-from object so that the
  317. // destructor is a no-op (preventing double free).
  318. Chunk(Chunk&& rhs) noexcept
  319. : buf_(std::exchange(rhs.buf_, nullptr)), num_(rhs.num_) {}
  320. auto operator=(Chunk&& rhs) noexcept -> Chunk& {
  321. buf_ = std::exchange(rhs.buf_, nullptr);
  322. num_ = rhs.num_;
  323. return *this;
  324. }
  325. ~Chunk() {
  326. if (buf_) {
  327. if constexpr (!std::is_trivially_destructible_v<ValueType>) {
  328. std::destroy_n(buf_, num_);
  329. }
  330. llvm::deallocate_buffer(buf_, CapacityBytes, alignof(ValueType));
  331. }
  332. }
  333. auto Get(int32_t i) -> ValueType& {
  334. CARBON_DCHECK(i < num_, "{0}", i);
  335. return buf_[i];
  336. }
  337. auto Get(int32_t i) const -> const ValueType& {
  338. CARBON_DCHECK(i < num_, "{0}", i);
  339. return buf_[i];
  340. }
  341. auto Add(ValueType&& value) -> void {
  342. CARBON_DCHECK(num_ < Capacity());
  343. std::construct_at(buf_ + num_, std::move(value));
  344. ++num_;
  345. }
  346. // Fills `fill_count` entries with `default_value`, increasing the size
  347. // respectively.
  348. auto UninitializedFill(int32_t fill_count, ConstRefType default_value)
  349. -> void {
  350. CARBON_DCHECK(num_ + fill_count <= Capacity());
  351. std::uninitialized_fill_n(buf_ + num_, fill_count, default_value);
  352. num_ += fill_count;
  353. }
  354. auto size() const -> int32_t { return num_; }
  355. private:
  356. // Verify using an `int32_t` for `num_` is sound.
  357. static_assert(Capacity() <= std::numeric_limits<int32_t>::max());
  358. ValueType* buf_;
  359. int32_t num_ = 0;
  360. };
  361. // Converts a raw index into an index into the set of chunks, and an offset
  362. // into that specific chunk. Looks for index overflow in non-optimized builds.
  363. static auto RawIndexToChunkIndices(int32_t index)
  364. -> std::pair<int32_t, int32_t> {
  365. constexpr auto LowBits = Chunk::IndexBits();
  366. // Verify there are no unused bits when indexing up to the `Capacity`. This
  367. // ensures that ids are contiguous values from 0, as if the values were all
  368. // stored in a single array, and allows using the ids to index into other
  369. // arrays.
  370. static_assert((1 << LowBits) == Chunk::Capacity());
  371. // Simple check to make sure nothing went wildly wrong with the `Capacity`,
  372. // and we have some room for a chunk index, and that shifting by the number
  373. // of bits won't be UB in an int32_t.
  374. static_assert(LowBits < 30);
  375. // The index of the chunk is the high bits.
  376. auto chunk = index >> LowBits;
  377. // The index into the chunk is the low bits.
  378. auto pos = index & ((1 << LowBits) - 1);
  379. return {chunk, pos};
  380. }
  381. // Converts an id into an index into the set of chunks, and an offset into
  382. // that specific chunk.
  383. auto IdToChunkIndices(IdType id) const -> std::pair<int32_t, int32_t> {
  384. return RawIndexToChunkIndices(GetRawIndex(id));
  385. }
  386. // Number of elements added to the store. The number should never exceed what
  387. // fits in an `int32_t`, which is checked in non-optimized builds in Add().
  388. int32_t size_ = 0;
  389. IdTag tag_;
  390. // Storage for the `ValueType` objects, indexed by the id. We use a vector of
  391. // chunks of `ValueType` instead of just a vector of `ValueType` so that
  392. // addresses of `ValueType` objects are stable. This allows the rest of the
  393. // toolchain to hold references into `ValueStore` without having to worry
  394. // about invalidation and use-after-free. We ensure at least one Chunk is held
  395. // inline so that in the case where there is only a single Chunk (i.e. small
  396. // files) we can avoid one indirection.
  397. llvm::SmallVector<Chunk, 1> chunks_;
  398. };
  399. } // namespace Carbon
  400. #endif // CARBON_TOOLCHAIN_BASE_VALUE_STORE_H_