value_store_chunk.h 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #ifndef CARBON_TOOLCHAIN_BASE_VALUE_STORE_CHUNK_H_
  5. #define CARBON_TOOLCHAIN_BASE_VALUE_STORE_CHUNK_H_
  6. #include <bit>
  7. #include <cstddef>
  8. #include <limits>
  9. #include <memory>
  10. #include <type_traits>
  11. #include <utility>
  12. #include "common/check.h"
  13. #include "llvm/ADT/StringRef.h"
  14. #include "llvm/Support/MemAlloc.h"
  15. #include "toolchain/base/mem_usage.h"
  16. namespace Carbon::Internal {
  17. // Ids which are stored in a `ValueStore` have a `ValueType` which indicates the
  18. // type of value held in the `ValueStore`.
  19. template <class IdT>
  20. concept IdHasValueType = requires { typename IdT::ValueType; };
  21. // The max size of each chunk allocation for `ValueStore`. This is based on TLB
  22. // page sizes for the target platform.
  23. //
  24. // See https://docs.kernel.org/admin-guide/mm/hugetlbpage.html
  25. //
  26. // A 4K chunk size outperforms a 1M chunk size on Linux-x64 and MacOS-arm64 in
  27. // benchmarks and when running file_test.
  28. //
  29. // Linux-x64: x64 CPUs support 4K and 2M page sizes, but we see 1M is slower
  30. // than 4K with tcmalloc in opt builds for our tests.
  31. //
  32. // Mac-arm64: arm64 CPUs support 4K, 8K, 64K, 256K, 1M, 4M and up. Like for
  33. // Linux-x64, 4K outperformed 1M. We didn't try other sizes yet.
  34. //
  35. // TODO: Is there a more optimize size for Mac-arm64? What should Linux-arm64
  36. // and Mac-x64 use? What should Windows use?
  37. //
  38. // TODO: The previous SmallVector<ValueType> seems to outperform 4K chunks (they
  39. // may be slower by up to 5%) in benchmarks. Find ways to make chunking faster.
  40. // Should successive chunks get larger in size? That will greatly complicate
  41. // math for choosing a chunk though.
  42. template <class IdT>
  43. requires(IdHasValueType<IdT>)
  44. static constexpr auto PlatformChunkMaxAllocationBytes() -> int32_t {
  45. #if !defined(NDEBUG) || LLVM_ADDRESS_SANITIZER_BUILD
  46. // Use a small size in unoptimized builds to ensure multiple chunks get used.
  47. // And do the same in ASAN builds to reduce bookkeeping overheads. Using large
  48. // allocations (e.g. 1M+) incurs a 10x runtime cost for our tests under ASAN.
  49. return sizeof(typename IdT::ValueType) * 5;
  50. #else
  51. return 4 * 1024;
  52. #endif
  53. }
  54. // The number of elements stored in each chunk allocation.
  55. //
  56. // The number must be a power of two so that that there are no unused values in
  57. // bits indexing into the allocation.
  58. template <class IdT>
  59. requires(IdHasValueType<IdT>)
  60. constexpr auto PlatformChunkCapacity() -> int32_t {
  61. constexpr auto MaxElements =
  62. PlatformChunkMaxAllocationBytes<IdT>() / sizeof(typename IdT::ValueType);
  63. return std::bit_floor(MaxElements);
  64. }
  65. // The number of bits needed to index each element in a chunk allocation.
  66. template <class IdT>
  67. requires(IdHasValueType<IdT>)
  68. constexpr auto PlatformChunkIndexBits() -> int32_t {
  69. static_assert(PlatformChunkCapacity<IdT>() > 0);
  70. return std::bit_width(uint32_t{PlatformChunkCapacity<IdT>() - 1});
  71. }
  72. // Converts an id into an index into the set of chunks, and an offset into that
  73. // specific chunk. Looks for index overflow in non-optimized builds.
  74. template <typename IdT>
  75. requires(IdHasValueType<IdT>)
  76. inline auto IdToChunkIndices(IdT id) -> std::pair<int32_t, int32_t> {
  77. constexpr auto LowBits = PlatformChunkIndexBits<IdT>();
  78. // Verify there are no unused bits when indexing up to the
  79. // PlatformChunkCapacity(). This ensures that ids are contiguous values
  80. // from 0, as if the values were all stored in a single array, and allows
  81. // using the ids to index into other arrays.
  82. static_assert((1 << LowBits) == PlatformChunkCapacity<IdT>());
  83. // Simple check to make sure nothing went wildly wrong with the
  84. // PlatformChunkCapacity, and we have some room for a chunk index, and
  85. // that shifting by the number of bits won't be UB in an int32_t.
  86. static_assert(LowBits < 30);
  87. // The index of the chunk is the high bits.
  88. auto chunk = id.index >> LowBits;
  89. // The index into the chunk is the low bits.
  90. auto pos = id.index & ((1 << LowBits) - 1);
  91. return {chunk, pos};
  92. }
  93. // A chunk of `ValueType`s which has a fixed capacity, but variable size. Tracks
  94. // the size internally for verifying bounds.
  95. template <typename IdT, class ValueType>
  96. struct ValueStoreChunk {
  97. public:
  98. static constexpr auto Capacity = Internal::PlatformChunkCapacity<IdT>();
  99. static constexpr auto CapacityBytes = Capacity * sizeof(ValueType);
  100. explicit ValueStoreChunk()
  101. : buf_(reinterpret_cast<ValueType*>(
  102. llvm::allocate_buffer(CapacityBytes, alignof(ValueType)))) {}
  103. // Moving leaves nullptr behind in the moved-from object so that the
  104. // destructor is a no-op (preventing double free).
  105. ValueStoreChunk(ValueStoreChunk&& rhs) noexcept
  106. : buf_(std::exchange(rhs.buf_, nullptr)), num_(rhs.num_) {}
  107. auto operator=(ValueStoreChunk&& rhs) noexcept -> ValueStoreChunk& {
  108. buf_ = std::exchange(rhs.buf_, nullptr);
  109. num_ = rhs.num_;
  110. return *this;
  111. }
  112. ~ValueStoreChunk() {
  113. if (buf_) {
  114. if constexpr (!std::is_trivially_destructible_v<ValueType>) {
  115. std::destroy_n(buf_, num_);
  116. }
  117. llvm::deallocate_buffer(buf_, CapacityBytes, alignof(ValueType));
  118. }
  119. }
  120. auto at(int32_t i) -> ValueType& {
  121. CARBON_CHECK(i < num_, "{0}", i);
  122. return buf_[i];
  123. }
  124. auto at(int32_t i) const -> const ValueType& {
  125. CARBON_CHECK(i < num_, "{0}", i);
  126. return buf_[i];
  127. }
  128. auto push(ValueType&& value) -> void {
  129. CARBON_CHECK(num_ < Capacity);
  130. std::construct_at(buf_ + num_, std::move(value));
  131. ++num_;
  132. }
  133. auto size() const -> int32_t { return num_; }
  134. private:
  135. // Verify using an `int32_t` for `num_` is sound.
  136. static_assert(Capacity <= std::numeric_limits<int32_t>::max());
  137. ValueType* buf_;
  138. int32_t num_ = 0;
  139. };
  140. } // namespace Carbon::Internal
  141. #endif // CARBON_TOOLCHAIN_BASE_VALUE_STORE_CHUNK_H_