raw_hashtable.h 55 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #ifndef CARBON_COMMON_RAW_HASHTABLE_H_
  5. #define CARBON_COMMON_RAW_HASHTABLE_H_
  6. #include <algorithm>
  7. #include <concepts>
  8. #include <cstddef>
  9. #include <cstring>
  10. #include <new>
  11. #include <type_traits>
  12. #include <utility>
  13. #include "common/check.h"
  14. #include "common/hashing.h"
  15. #include "common/hashtable_key_context.h"
  16. #include "common/raw_hashtable_metadata_group.h"
  17. #include "llvm/Support/Compiler.h"
  18. #include "llvm/Support/MathExtras.h"
  19. // A namespace collecting a set of low-level utilities for building hashtable
  20. // data structures. These should only be used as implementation details of
  21. // higher-level data-structure APIs.
  22. //
  23. // The utilities here use the `hashtable_key_context.h` provided `KeyContext` to
  24. // support the necessary hashtable operations on keys: hashing and comparison.
  25. // This also serves as the customization point for hashtables built on this
  26. // infrastructure for those operations. See that header file for details.
  27. //
  28. // These utilities support hashtables following a *specific* API design pattern,
  29. // and using Small-Size Optimization, or "SSO", when desired. We expect there to
  30. // be three layers to any hashtable design:
  31. //
  32. // - A *view* type: a read-only view of the hashtable contents. This type should
  33. // be a value type and is expected to be passed by-value in APIs. However, it
  34. // will have `const`-reference semantics, much like a `std::string_view`. Note
  35. // that the *entries* will continue to be mutable, it is only the *table* that
  36. // is read-only.
  37. //
  38. // - A *base* type: a base class type of the actual hashtable, which allows
  39. // almost all mutable operations but erases any specific SSO buffer size.
  40. // Because this is a base of the actual hash table, it is designed to be
  41. // passed as a non-`const` reference or pointer.
  42. //
  43. // - A *table* type: the actual hashtable which derives from the base type and
  44. // adds any desired SSO storage buffer. Beyond the physical storage, it also
  45. // allows resetting the table to its initial state & allocated size, as well
  46. // as copying and moving the table.
  47. //
  48. // For complete examples of the API design, see `set.h` for a hashtable-based
  49. // set data structure, and `map.h` for a hashtable-based map data structure.
  50. //
  51. // The hashtable design implemented here has several key invariants and design
  52. // elements that are essential to all three of the types above and the
  53. // functionality they provide.
  54. //
  55. // - The underlying hashtable uses [open addressing], a power-of-two table size,
  56. // and quadratic probing rather than closed addressing and chaining.
  57. //
  58. // [open addressing]: https://en.wikipedia.org/wiki/Open_addressing
  59. //
  60. // - Each _slot_ in the table corresponds to a key, a value, and one byte of
  61. // metadata. Each _entry_ is a key and value. The key and value for an entry
  62. // are stored together.
  63. //
  64. // - The allocated storage is organized into an array of metadata bytes followed
  65. // by an array of entry storage.
  66. //
  67. // - The metadata byte corresponding to each entry marks that entry is either
  68. // empty, deleted, or present. When present, a 7-bit tag is also stored using
  69. // another 7 bits from the hash of the entry key.
  70. //
  71. // - The storage for an entry is an internal type that should not be exposed to
  72. // users, and instead only the underlying keys and values.
  73. //
  74. // - The hash addressing and probing occurs over *groups* of slots rather than
  75. // individual entries. When inserting a new entry, it can be added to the
  76. // group it hashes to as long it is not full, and can even replace a slot with
  77. // a tombstone indicating a previously deleted entry. Only when the group is
  78. // full will it look at the next group in the probe sequence. As a result,
  79. // there may be entries in a group where a different group is the start of
  80. // that entry's probe sequence. Also, when performing a lookup, every group in
  81. // the probe sequence must be inspected for the lookup key until it is found
  82. // or the group has an empty slot.
  83. //
  84. // - Groups are scanned rapidly using the one-byte metadata for each entry in
  85. // the group and CPU instructions that allow comparing all of the metadata for
  86. // a group in parallel. For more details on the metadata group encoding and
  87. // scanning, see `raw_hashtable_metadata_group.h`.
  88. //
  89. // - `GroupSize` is a platform-specific relatively small power of two that fits
  90. // in some hardware register. However, `MaxGroupSize` is provided as a
  91. // portable max that is also a power of two. The table storage, whether
  92. // provided by an SSO buffer or allocated, is required to be a multiple of
  93. // `MaxGroupSize` to keep the requirement portable but sufficient for all
  94. // platforms.
  95. //
  96. // - There is *always* an allocated table of some multiple of `MaxGroupSize`.
  97. // This allows accesses to be branchless. When heap allocated, we pro-actively
  98. // allocate at least a minimum heap size table. When there is a small-size
  99. // optimization (SSO) buffer, that provides the initial allocation.
  100. //
  101. // - The table performs a minimal amount of bookkeeping that limits the APIs it
  102. // can support:
  103. // - `alloc_size` is the size of the table *allocated* (not *used*), and is
  104. // always a power of 2 at least as big as `MinAllocatedSize`.
  105. // - `storage` is a pointer to the storage for the `alloc_size` slots of the
  106. // table, and never null.
  107. // - `small_alloc_size` is the maximum `alloc_size` where the table is stored
  108. // in the object itself instead of separately on the heap. In this case,
  109. // `storage` points to `small_storage_`.
  110. // - `growth_budget` is the number of entries that may be added before the
  111. // table allocation is doubled. It is always
  112. // `GrowthThresholdForAllocSize(alloc_size)` minus the number of
  113. // non-empty (filled or deleted) slots. If it ever falls to 0, the table
  114. // is grown to keep it greater than 0.
  115. // There is also the "moved-from" state where the table may only be
  116. // reinitialized or destroyed where the `alloc_size` is 0 and `storage` is
  117. // null. Since it doesn't track the exact number of filled entries in a table,
  118. // it doesn't support a container-style `size` API.
  119. //
  120. // - There is no direct iterator support because of the complexity of embedding
  121. // the group-based metadata scanning into an iterator model. Instead, there is
  122. // just a for-each method that is passed a lambda to observe all entries. The
  123. // order of this observation is also not guaranteed.
  124. namespace Carbon::RawHashtable {
  125. // If allocating storage, allocate a minimum of one cacheline of group metadata
  126. // or a minimum of one group, whichever is larger.
  127. constexpr ssize_t MinAllocatedSize = std::max<ssize_t>(64, MaxGroupSize);
  128. // An entry in the hashtable storage of a `KeyT` and `ValueT` object.
  129. //
  130. // Allows manual construction, destruction, and access to these values so we can
  131. // create arrays af the entries prior to populating them with actual keys and
  132. // values.
  133. template <typename KeyT, typename ValueT>
  134. struct StorageEntry {
  135. static constexpr bool IsTriviallyDestructible =
  136. std::is_trivially_destructible_v<KeyT> &&
  137. std::is_trivially_destructible_v<ValueT>;
  138. static constexpr bool IsTriviallyRelocatable =
  139. IsTriviallyDestructible && std::is_trivially_move_constructible_v<KeyT> &&
  140. std::is_trivially_move_constructible_v<ValueT>;
  141. auto key() const -> const KeyT& {
  142. // Ensure we don't need more alignment than available. Inside a method body
  143. // to apply to the complete type.
  144. static_assert(
  145. alignof(StorageEntry) <= MinAllocatedSize,
  146. "The minimum allocated size turns into the alignment of our array of "
  147. "storage entries as they follow the metadata byte array.");
  148. return *std::launder(reinterpret_cast<const KeyT*>(&key_storage));
  149. }
  150. auto key() -> KeyT& {
  151. return const_cast<KeyT&>(const_cast<const StorageEntry*>(this)->key());
  152. }
  153. auto value() const -> const ValueT& {
  154. return *std::launder(reinterpret_cast<const ValueT*>(&value_storage));
  155. }
  156. auto value() -> ValueT& {
  157. return const_cast<ValueT&>(const_cast<const StorageEntry*>(this)->value());
  158. }
  159. // We handle destruction and move manually as we only want to expose distinct
  160. // `KeyT` and `ValueT` subobjects to user code that may need to do in-place
  161. // construction. As a consequence, this struct only provides the storage and
  162. // we have to manually manage the construction, move, and destruction of the
  163. // objects.
  164. auto Destroy() -> void {
  165. static_assert(!IsTriviallyDestructible,
  166. "Should never instantiate when trivial!");
  167. key().~KeyT();
  168. value().~ValueT();
  169. }
  170. auto CopyFrom(const StorageEntry& entry) -> void {
  171. if constexpr (IsTriviallyRelocatable) {
  172. memcpy(this, &entry, sizeof(StorageEntry));
  173. } else {
  174. new (&key_storage) KeyT(entry.key());
  175. new (&value_storage) ValueT(entry.value());
  176. }
  177. }
  178. // Move from an expiring entry and destroy that entry's key and value.
  179. // Optimizes to directly use `memcpy` when correct.
  180. auto MoveFrom(StorageEntry&& entry) -> void {
  181. if constexpr (IsTriviallyRelocatable) {
  182. memcpy(this, &entry, sizeof(StorageEntry));
  183. } else {
  184. new (&key_storage) KeyT(std::move(entry.key()));
  185. entry.key().~KeyT();
  186. new (&value_storage) ValueT(std::move(entry.value()));
  187. entry.value().~ValueT();
  188. }
  189. }
  190. alignas(KeyT) std::byte key_storage[sizeof(KeyT)];
  191. alignas(ValueT) std::byte value_storage[sizeof(ValueT)];
  192. };
  193. // A specialization of the storage entry for sets without a distinct value type.
  194. // Somewhat duplicative with the key-value version, but C++ specialization makes
  195. // doing better difficult.
  196. template <typename KeyT>
  197. struct StorageEntry<KeyT, void> {
  198. static constexpr bool IsTriviallyDestructible =
  199. std::is_trivially_destructible_v<KeyT>;
  200. static constexpr bool IsTriviallyRelocatable =
  201. IsTriviallyDestructible && std::is_trivially_move_constructible_v<KeyT>;
  202. auto key() const -> const KeyT& {
  203. // Ensure we don't need more alignment than available.
  204. static_assert(
  205. alignof(StorageEntry) <= MinAllocatedSize,
  206. "The minimum allocated size turns into the alignment of our array of "
  207. "storage entries as they follow the metadata byte array.");
  208. return *std::launder(reinterpret_cast<const KeyT*>(&key_storage));
  209. }
  210. auto key() -> KeyT& {
  211. return const_cast<KeyT&>(const_cast<const StorageEntry*>(this)->key());
  212. }
  213. auto Destroy() -> void {
  214. static_assert(!IsTriviallyDestructible,
  215. "Should never instantiate when trivial!");
  216. key().~KeyT();
  217. }
  218. auto CopyFrom(const StorageEntry& entry) -> void {
  219. if constexpr (IsTriviallyRelocatable) {
  220. memcpy(this, &entry, sizeof(StorageEntry));
  221. } else {
  222. new (&key_storage) KeyT(entry.key());
  223. }
  224. }
  225. auto MoveFrom(StorageEntry&& entry) -> void {
  226. if constexpr (IsTriviallyRelocatable) {
  227. memcpy(this, &entry, sizeof(StorageEntry));
  228. } else {
  229. new (&key_storage) KeyT(std::move(entry.key()));
  230. entry.key().~KeyT();
  231. }
  232. }
  233. alignas(KeyT) std::byte key_storage[sizeof(KeyT)];
  234. };
  235. // A placeholder empty type used to model pointers to the allocated buffer of
  236. // storage.
  237. //
  238. // The allocated storage doesn't have a meaningful static layout -- it consists
  239. // of an array of metadata groups followed by an array of storage entries.
  240. // However, we want to be able to mark pointers to this and so use pointers to
  241. // this placeholder type as that signifier.
  242. //
  243. // This is a complete, empty type so that it can be used as a base class of a
  244. // specific concrete storage type for compile-time sized storage.
  245. struct Storage {};
  246. // Forward declaration to support friending, see the definition below.
  247. template <typename KeyT, typename ValueT = void,
  248. typename InputKeyContextT = DefaultKeyContext>
  249. class BaseImpl;
  250. // Implementation helper for defining a read-only view type for a hashtable.
  251. //
  252. // A specific user-facing hashtable view type should derive privately from this
  253. // type, and forward the implementation of its interface to functions in this
  254. // type.
  255. //
  256. // The methods available to user-facing hashtable types are `protected`, and
  257. // where they are expected to directly map to a public API, named with an
  258. // `Impl`. The suffix naming ensures types don't `using` in these low-level APIs
  259. // but declare their own and implement them by forwarding to these APIs. We
  260. // don't want users to have to read these implementation details to understand
  261. // their container's API, so none of these methods should be `using`-ed into the
  262. // user facing types.
  263. //
  264. // Some of the types are just convenience aliases and aren't important to
  265. // surface as part of the user-facing type API for readers and so those are
  266. // reasonable to add via a `using`.
  267. //
  268. // Some methods are used by other parts of the raw hashtable implementation.
  269. // Those are kept `private` and where necessary the other components of the raw
  270. // hashtable implementation are friended to give access to them.
  271. template <typename InputKeyT, typename InputValueT = void,
  272. typename InputKeyContextT = DefaultKeyContext>
  273. class ViewImpl {
  274. protected:
  275. using KeyT = InputKeyT;
  276. using ValueT = InputValueT;
  277. using KeyContextT = InputKeyContextT;
  278. using EntryT = StorageEntry<KeyT, ValueT>;
  279. friend class BaseImpl<KeyT, ValueT, KeyContextT>;
  280. // Make more-`const` types friends to enable conversions that add `const`.
  281. friend class ViewImpl<const KeyT, ValueT, KeyContextT>;
  282. friend class ViewImpl<KeyT, const ValueT, KeyContextT>;
  283. friend class ViewImpl<const KeyT, const ValueT, KeyContextT>;
  284. ViewImpl() = default;
  285. // Support adding `const` to either key or value type of some other view.
  286. template <typename OtherKeyT, typename OtherValueT>
  287. // NOLINTNEXTLINE(google-explicit-constructor)
  288. ViewImpl(ViewImpl<OtherKeyT, OtherValueT, KeyContextT> other_view)
  289. requires(std::same_as<KeyT, OtherKeyT> ||
  290. std::same_as<KeyT, const OtherKeyT>) &&
  291. (std::same_as<ValueT, OtherValueT> ||
  292. std::same_as<ValueT, const OtherValueT>)
  293. : alloc_size_(other_view.alloc_size_), storage_(other_view.storage_) {}
  294. // Looks up an entry in the hashtable and returns its address or null if not
  295. // present.
  296. template <typename LookupKeyT>
  297. auto LookupEntry(LookupKeyT lookup_key, KeyContextT key_context) const
  298. -> EntryT*;
  299. // Calls `entry_callback` for each entry in the hashtable. All the entries
  300. // within a specific group are visited first, and then `group_callback` is
  301. // called on the group itself. The `group_callback` is typically only used by
  302. // the internals of the hashtable.
  303. template <typename EntryCallbackT, typename GroupCallbackT>
  304. auto ForEachEntry(EntryCallbackT entry_callback,
  305. GroupCallbackT group_callback) const -> void;
  306. // Counts the number of keys in the hashtable that required probing beyond the
  307. // initial group.
  308. auto CountProbedKeys(KeyContextT key_context) const -> ssize_t;
  309. private:
  310. ViewImpl(ssize_t alloc_size, Storage* storage)
  311. : alloc_size_(alloc_size), storage_(storage) {}
  312. // Computes the offset from the metadata array to the entries array for a
  313. // given size. This is trivial, but we use this routine to enforce invariants
  314. // on the sizes.
  315. static constexpr auto EntriesOffset(ssize_t alloc_size) -> ssize_t {
  316. CARBON_DCHECK(llvm::isPowerOf2_64(alloc_size))
  317. << "Size must be a power of two for a hashed buffer!";
  318. // The size is always a power of two. We prevent any too-small sizes so it
  319. // being a power of two provides the needed alignment. As a result, the
  320. // offset is exactly the size. We validate this here to catch alignment bugs
  321. // early.
  322. CARBON_DCHECK(static_cast<uint64_t>(alloc_size) ==
  323. llvm::alignTo<alignof(EntryT)>(alloc_size));
  324. return alloc_size;
  325. }
  326. auto metadata() const -> uint8_t* {
  327. return reinterpret_cast<uint8_t*>(storage_);
  328. }
  329. auto entries() const -> EntryT* {
  330. return reinterpret_cast<EntryT*>(reinterpret_cast<std::byte*>(storage_) +
  331. EntriesOffset(alloc_size_));
  332. }
  333. ssize_t alloc_size_;
  334. Storage* storage_;
  335. };
  336. // Implementation helper for defining a read-write base type for a hashtable
  337. // that type-erases any SSO buffer.
  338. //
  339. // A specific user-facing hashtable base type should derive using *`protected`*
  340. // inheritance from this type, and forward the implementation of its interface
  341. // to functions in this type.
  342. //
  343. // Other than the use of `protected` inheritance, the patterns for this type,
  344. // and how to build user-facing hashtable base types from it, mirror those of
  345. // `ViewImpl`. See its documentation for more details.
  346. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  347. class BaseImpl {
  348. protected:
  349. using KeyT = InputKeyT;
  350. using ValueT = InputValueT;
  351. using KeyContextT = InputKeyContextT;
  352. using ViewImplT = ViewImpl<KeyT, ValueT, KeyContextT>;
  353. using EntryT = typename ViewImplT::EntryT;
  354. BaseImpl(int small_alloc_size, Storage* small_storage)
  355. : small_alloc_size_(small_alloc_size) {
  356. CARBON_CHECK(small_alloc_size >= 0);
  357. Construct(small_storage);
  358. }
  359. // Only used for copying and moving, and leaves storage uninitialized.
  360. BaseImpl(ssize_t alloc_size, int growth_budget, int small_alloc_size)
  361. : view_impl_(alloc_size, nullptr),
  362. growth_budget_(growth_budget),
  363. small_alloc_size_(small_alloc_size) {}
  364. ~BaseImpl();
  365. // NOLINTNEXTLINE(google-explicit-constructor): Designed to implicitly decay.
  366. operator ViewImplT() const { return view_impl(); }
  367. auto view_impl() const -> ViewImplT { return view_impl_; }
  368. // Looks up the provided key in the hashtable. If found, returns a pointer to
  369. // that entry and `false`.
  370. //
  371. // If not found, will locate an empty entry for inserting into, set the
  372. // metadata for that entry, and return a pointer to the entry and `true`. When
  373. // necessary, this will grow the hashtable to cause there to be sufficient
  374. // empty entries.
  375. template <typename LookupKeyT>
  376. auto InsertImpl(LookupKeyT lookup_key, KeyContextT key_context)
  377. -> std::pair<EntryT*, bool>;
  378. // Looks up the entry in the hashtable, and if found destroys the entry and
  379. // returns `true`. If not found, returns `false`.
  380. //
  381. // Does not release any memory, just leaves a tombstone behind so this entry
  382. // cannot be found and the slot can in theory be re-used.
  383. template <typename LookupKeyT>
  384. auto EraseImpl(LookupKeyT lookup_key, KeyContextT key_context) -> bool;
  385. // Erases all entries in the hashtable but leaves the allocated storage.
  386. auto ClearImpl() -> void;
  387. private:
  388. template <typename InputBaseT, ssize_t SmallSize>
  389. friend class TableImpl;
  390. static constexpr ssize_t Alignment = std::max<ssize_t>(
  391. {alignof(MetadataGroup), alignof(StorageEntry<KeyT, ValueT>)});
  392. // Implementation of inline small storage for the provided key type, value
  393. // type, and small size. Specialized for a zero small size to be an empty
  394. // struct.
  395. template <ssize_t SmallSize>
  396. struct SmallStorage : Storage {
  397. alignas(Alignment) uint8_t metadata[SmallSize];
  398. mutable StorageEntry<KeyT, ValueT> entries[SmallSize];
  399. };
  400. // Specialized storage with no inline buffer to avoid any extra alignment.
  401. template <>
  402. struct SmallStorage<0> {};
  403. static constexpr auto AllocByteSize(ssize_t alloc_size) -> ssize_t {
  404. return ViewImplT::EntriesOffset(alloc_size) + sizeof(EntryT) * alloc_size;
  405. }
  406. static auto Allocate(ssize_t alloc_size) -> Storage*;
  407. static auto Deallocate(Storage* storage, ssize_t alloc_size) -> void;
  408. auto growth_budget() const -> ssize_t { return growth_budget_; }
  409. auto alloc_size() const -> ssize_t { return view_impl_.alloc_size_; }
  410. auto alloc_size() -> ssize_t& { return view_impl_.alloc_size_; }
  411. auto storage() const -> Storage* { return view_impl_.storage_; }
  412. auto storage() -> Storage*& { return view_impl_.storage_; }
  413. auto metadata() const -> uint8_t* { return view_impl_.metadata(); }
  414. auto entries() const -> EntryT* { return view_impl_.entries(); }
  415. auto small_alloc_size() const -> ssize_t {
  416. return static_cast<unsigned>(small_alloc_size_);
  417. }
  418. auto is_small() const -> bool { return alloc_size() <= small_alloc_size(); }
  419. auto Construct(Storage* small_storage) -> void;
  420. auto Destroy() -> void;
  421. template <typename LookupKeyT>
  422. auto InsertIntoEmpty(LookupKeyT lookup_key, KeyContextT key_context)
  423. -> EntryT*;
  424. static auto ComputeNextAllocSize(ssize_t old_alloc_size) -> ssize_t;
  425. static auto GrowthThresholdForAllocSize(ssize_t alloc_size) -> ssize_t;
  426. template <typename LookupKeyT>
  427. auto GrowAndInsert(LookupKeyT lookup_key, KeyContextT key_context) -> EntryT*;
  428. ViewImplT view_impl_;
  429. int growth_budget_;
  430. int small_alloc_size_;
  431. };
  432. // Implementation helper for defining a hashtable type with an SSO buffer.
  433. //
  434. // A specific user-facing hashtable should derive privately from this
  435. // type, and forward the implementation of its interface to functions in this
  436. // type. It should provide the corresponding user-facing hashtable base type as
  437. // the `InputBaseT` type parameter (rather than a key/value pair), and this type
  438. // will in turn derive from that provided base type. This allows derived-to-base
  439. // conversion from the user-facing hashtable type to the user-facing hashtable
  440. // base type. And it does so keeping the inheritance linear. The resulting
  441. // linear inheritance hierarchy for a `Map<K, T>` type will look like:
  442. //
  443. // Map<K, T>
  444. // ↓
  445. // TableImpl<MapBase<K, T>>
  446. // ↓
  447. // MapBase<K, T>
  448. // ↓
  449. // BaseImpl<K, T>
  450. //
  451. // Other than this inheritance technique, the patterns for this type, and how to
  452. // build user-facing hashtable types from it, mirror those of `ViewImpl`. See
  453. // its documentation for more details.
  454. template <typename InputBaseT, ssize_t SmallSize>
  455. class TableImpl : public InputBaseT {
  456. protected:
  457. using BaseT = InputBaseT;
  458. TableImpl() : BaseT(SmallSize, small_storage()) {}
  459. TableImpl(const TableImpl& arg);
  460. TableImpl(TableImpl&& arg) noexcept;
  461. // Resets the hashtable to its initial state, clearing all entries and
  462. // releasing all memory. If the hashtable had an SSO buffer, that is restored
  463. // as the storage. Otherwise, a minimum sized table storage is allocated.
  464. auto ResetImpl() -> void;
  465. private:
  466. using KeyT = BaseT::KeyT;
  467. using ValueT = BaseT::ValueT;
  468. using EntryT = BaseT::EntryT;
  469. using SmallStorage = BaseT::template SmallStorage<SmallSize>;
  470. auto small_storage() const -> Storage*;
  471. [[no_unique_address]] mutable SmallStorage small_storage_;
  472. };
  473. ////////////////////////////////////////////////////////////////////////////////
  474. //
  475. // Only implementation details below this point.
  476. //
  477. ////////////////////////////////////////////////////////////////////////////////
  478. // Computes a seed that provides a small amount of entropy from ASLR where
  479. // available with minimal cost. The priority is speed, and this computes the
  480. // entropy in a way that doesn't require loading from memory, merely accessing
  481. // entropy already available without accessing memory.
  482. inline auto ComputeSeed() -> uint64_t {
  483. // A global variable whose address is used as a seed. This allows ASLR to
  484. // introduce some variation in hashtable ordering when enabled via the code
  485. // model for globals.
  486. extern volatile std::byte global_addr_seed;
  487. return reinterpret_cast<uint64_t>(&global_addr_seed);
  488. }
  489. inline auto ComputeProbeMaskFromSize(ssize_t size) -> size_t {
  490. CARBON_DCHECK(llvm::isPowerOf2_64(size))
  491. << "Size must be a power of two for a hashed buffer!";
  492. // Since `size` is a power of two, we can make sure the probes are less
  493. // than `size` by making the mask `size - 1`. We also mask off the low
  494. // bits so the probes are a multiple of the size of the groups of entries.
  495. return (size - 1) & ~GroupMask;
  496. }
  497. // This class handles building a sequence of probe indices from a given
  498. // starting point, including both the quadratic growth and masking the index
  499. // to stay within the bucket array size. The starting point doesn't need to be
  500. // clamped to the size ahead of time (or even be positive), we will do it
  501. // internally.
  502. //
  503. // For reference on quadratic probing:
  504. // https://en.wikipedia.org/wiki/Quadratic_probing
  505. //
  506. // We compute the quadratic probe index incrementally, but we can also compute
  507. // it mathematically and will check that the incremental result matches our
  508. // mathematical expectation. We use the quadratic probing formula of:
  509. //
  510. // p(start, step) = (start + (step + step^2) / 2) (mod size / GroupSize)
  511. //
  512. // However, we compute it incrementally and scale all the variables by the group
  513. // size so it can be used as an index without an additional multiplication.
  514. class ProbeSequence {
  515. public:
  516. ProbeSequence(ssize_t start, ssize_t size) {
  517. mask_ = ComputeProbeMaskFromSize(size);
  518. p_ = start & mask_;
  519. #ifndef NDEBUG
  520. start_ = start & mask_;
  521. size_ = size;
  522. #endif
  523. }
  524. void Next() {
  525. step_ += GroupSize;
  526. p_ = (p_ + step_) & mask_;
  527. #ifndef NDEBUG
  528. // Verify against the quadratic formula we expect to be following by scaling
  529. // everything down by `GroupSize`.
  530. CARBON_DCHECK(
  531. (p_ / GroupSize) ==
  532. ((start_ / GroupSize +
  533. (step_ / GroupSize + (step_ / GroupSize) * (step_ / GroupSize)) / 2) %
  534. (size_ / GroupSize)))
  535. << "Index in probe sequence does not match the expected formula.";
  536. CARBON_DCHECK(step_ < size_) << "We necessarily visit all groups, so we "
  537. "can't have more probe steps than groups.";
  538. #endif
  539. }
  540. auto index() const -> ssize_t { return p_; }
  541. private:
  542. ssize_t step_ = 0;
  543. size_t mask_;
  544. ssize_t p_;
  545. #ifndef NDEBUG
  546. ssize_t start_;
  547. ssize_t size_;
  548. #endif
  549. };
  550. // TODO: Evaluate keeping this outlined to see if macro benchmarks observe the
  551. // same perf hit as micro benchmarks.
  552. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  553. template <typename LookupKeyT>
  554. auto ViewImpl<InputKeyT, InputValueT, InputKeyContextT>::LookupEntry(
  555. LookupKeyT lookup_key, KeyContextT key_context) const -> EntryT* {
  556. // Prefetch with a "low" temporal locality as we're primarily expecting a
  557. // brief use of the storage and then to return to application code.
  558. __builtin_prefetch(storage_, /*read*/ 0, /*low-locality*/ 1);
  559. ssize_t local_size = alloc_size_;
  560. CARBON_DCHECK(local_size > 0);
  561. uint8_t* local_metadata = metadata();
  562. HashCode hash = key_context.HashKey(lookup_key, ComputeSeed());
  563. auto [hash_index, tag] = hash.ExtractIndexAndTag<7>();
  564. EntryT* local_entries = entries();
  565. // Walk through groups of entries using a quadratic probe starting from
  566. // `hash_index`.
  567. ProbeSequence s(hash_index, local_size);
  568. do {
  569. ssize_t group_index = s.index();
  570. // For each group, match the tag against the metadata to extract the
  571. // potentially matching entries within the group.
  572. MetadataGroup g = MetadataGroup::Load(local_metadata, group_index);
  573. auto metadata_matched_range = g.Match(tag);
  574. if (LLVM_LIKELY(metadata_matched_range)) {
  575. // If any entries in this group potentially match based on their metadata,
  576. // walk each candidate and compare its key to see if we have definitively
  577. // found a match.
  578. EntryT* group_entries = &local_entries[group_index];
  579. auto byte_it = metadata_matched_range.begin();
  580. auto byte_end = metadata_matched_range.end();
  581. do {
  582. EntryT* entry = byte_it.index_ptr(group_entries);
  583. if (LLVM_LIKELY(key_context.KeyEq(lookup_key, entry->key()))) {
  584. __builtin_assume(entry != nullptr);
  585. return entry;
  586. }
  587. ++byte_it;
  588. } while (LLVM_UNLIKELY(byte_it != byte_end));
  589. }
  590. // We failed to find a matching entry in this bucket, so check if there are
  591. // empty slots as that indicates we're done probing -- no later probed index
  592. // could have a match.
  593. auto empty_byte_matched_range = g.MatchEmpty();
  594. if (LLVM_LIKELY(empty_byte_matched_range)) {
  595. return nullptr;
  596. }
  597. s.Next();
  598. // We use a weird construct of an "unlikely" condition of `true`. The goal
  599. // is to get the compiler to not prioritize the back edge of the loop for
  600. // code layout, and in at least some tests this seems to be an effective
  601. // construct for achieving this.
  602. } while (LLVM_UNLIKELY(true));
  603. }
  604. // Note that we force inlining here because we expect to be called with lambdas
  605. // that will in turn be inlined to form the loop body. We don't want function
  606. // boundaries within the loop for performance, and recognizing the degree of
  607. // simplification from inlining these callbacks may be difficult to
  608. // automatically recognize.
  609. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  610. template <typename EntryCallbackT, typename GroupCallbackT>
  611. [[clang::always_inline]] auto
  612. ViewImpl<InputKeyT, InputValueT, InputKeyContextT>::ForEachEntry(
  613. EntryCallbackT entry_callback, GroupCallbackT group_callback) const
  614. -> void {
  615. uint8_t* local_metadata = metadata();
  616. EntryT* local_entries = entries();
  617. ssize_t local_size = alloc_size_;
  618. for (ssize_t group_index = 0; group_index < local_size;
  619. group_index += GroupSize) {
  620. auto g = MetadataGroup::Load(local_metadata, group_index);
  621. auto present_matched_range = g.MatchPresent();
  622. if (!present_matched_range) {
  623. continue;
  624. }
  625. for (ssize_t byte_index : present_matched_range) {
  626. entry_callback(local_entries[group_index + byte_index]);
  627. }
  628. group_callback(&local_metadata[group_index]);
  629. }
  630. }
  631. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  632. auto ViewImpl<InputKeyT, InputValueT, InputKeyContextT>::CountProbedKeys(
  633. KeyContextT key_context) const -> ssize_t {
  634. uint8_t* local_metadata = metadata();
  635. EntryT* local_entries = entries();
  636. ssize_t local_size = alloc_size_;
  637. ssize_t count = 0;
  638. for (ssize_t group_index = 0; group_index < local_size;
  639. group_index += GroupSize) {
  640. auto g = MetadataGroup::Load(local_metadata, group_index);
  641. auto present_matched_range = g.MatchPresent();
  642. for (ssize_t byte_index : present_matched_range) {
  643. ssize_t index = group_index + byte_index;
  644. HashCode hash =
  645. key_context.HashKey(local_entries[index].key(), ComputeSeed());
  646. ssize_t hash_index = hash.ExtractIndexAndTag<7>().first &
  647. ComputeProbeMaskFromSize(local_size);
  648. count += static_cast<ssize_t>(hash_index != group_index);
  649. }
  650. }
  651. return count;
  652. }
  653. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  654. BaseImpl<InputKeyT, InputValueT, InputKeyContextT>::~BaseImpl() {
  655. Destroy();
  656. }
  657. // TODO: Evaluate whether it is worth forcing this out-of-line given the
  658. // reasonable ABI boundary it forms and large volume of code necessary to
  659. // implement it.
  660. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  661. template <typename LookupKeyT>
  662. auto BaseImpl<InputKeyT, InputValueT, InputKeyContextT>::InsertImpl(
  663. LookupKeyT lookup_key, KeyContextT key_context)
  664. -> std::pair<EntryT*, bool> {
  665. CARBON_DCHECK(alloc_size() > 0);
  666. uint8_t* local_metadata = metadata();
  667. HashCode hash = key_context.HashKey(lookup_key, ComputeSeed());
  668. auto [hash_index, tag] = hash.ExtractIndexAndTag<7>();
  669. // We re-purpose the empty control byte to signal no insert is needed to the
  670. // caller. This is guaranteed to not be a control byte we're inserting.
  671. // constexpr uint8_t NoInsertNeeded = Group::Empty;
  672. ssize_t group_with_deleted_index;
  673. MetadataGroup::MatchIndex deleted_match = {};
  674. EntryT* local_entries = entries();
  675. auto return_insert_at_index = [&](ssize_t index) -> std::pair<EntryT*, bool> {
  676. // We'll need to insert at this index so set the control group byte to the
  677. // proper value.
  678. local_metadata[index] = tag | MetadataGroup::PresentMask;
  679. return {&local_entries[index], true};
  680. };
  681. for (ProbeSequence s(hash_index, alloc_size());; s.Next()) {
  682. ssize_t group_index = s.index();
  683. auto g = MetadataGroup::Load(local_metadata, group_index);
  684. auto control_byte_matched_range = g.Match(tag);
  685. if (control_byte_matched_range) {
  686. EntryT* group_entries = &local_entries[group_index];
  687. auto byte_it = control_byte_matched_range.begin();
  688. auto byte_end = control_byte_matched_range.end();
  689. do {
  690. EntryT* entry = byte_it.index_ptr(group_entries);
  691. if (LLVM_LIKELY(key_context.KeyEq(lookup_key, entry->key()))) {
  692. return {entry, false};
  693. }
  694. ++byte_it;
  695. } while (LLVM_UNLIKELY(byte_it != byte_end));
  696. }
  697. // Track the first group with a deleted entry that we could insert over.
  698. if (!deleted_match) {
  699. deleted_match = g.MatchDeleted();
  700. group_with_deleted_index = group_index;
  701. }
  702. // We failed to find a matching entry in this bucket, so check if there are
  703. // no empty slots. In that case, we'll continue probing.
  704. auto empty_match = g.MatchEmpty();
  705. if (!empty_match) {
  706. continue;
  707. }
  708. // Ok, we've finished probing without finding anything and need to insert
  709. // instead.
  710. // If we found a deleted slot, we don't need the probe sequence to insert
  711. // so just bail. We want to ensure building up a table is fast so we
  712. // de-prioritize this a bit. In practice this doesn't have too much of an
  713. // effect.
  714. if (LLVM_UNLIKELY(deleted_match)) {
  715. return return_insert_at_index(group_with_deleted_index +
  716. deleted_match.index());
  717. }
  718. // We're going to need to grow by inserting into an empty slot. Check that
  719. // we have the budget for that before we compute the exact index of the
  720. // empty slot. Without the growth budget we'll have to completely rehash and
  721. // so we can just bail here.
  722. if (LLVM_UNLIKELY(growth_budget_ == 0)) {
  723. return {GrowAndInsert(lookup_key, key_context), true};
  724. }
  725. --growth_budget_;
  726. CARBON_DCHECK(growth_budget() >= 0)
  727. << "Growth budget shouldn't have gone negative!";
  728. return return_insert_at_index(group_index + empty_match.index());
  729. }
  730. CARBON_FATAL() << "We should never finish probing without finding the entry "
  731. "or an empty slot.";
  732. }
  733. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  734. template <typename LookupKeyT>
  735. auto BaseImpl<InputKeyT, InputValueT, InputKeyContextT>::EraseImpl(
  736. LookupKeyT lookup_key, KeyContextT key_context) -> bool {
  737. EntryT* entry = view_impl_.LookupEntry(lookup_key, key_context);
  738. if (!entry) {
  739. return false;
  740. }
  741. // If there are empty slots in this group then nothing will probe past this
  742. // group looking for an entry so we can simply set this slot to empty as
  743. // well. However, if every slot in this group is full, it might be part of
  744. // a long probe chain that we can't disrupt. In that case we mark the slot's
  745. // metadata as deleted to keep probes continuing past it.
  746. //
  747. // If we mark the slot as empty, we'll also need to increase the growth
  748. // budget.
  749. uint8_t* local_metadata = metadata();
  750. EntryT* local_entries = entries();
  751. ssize_t index = entry - local_entries;
  752. ssize_t group_index = index & ~GroupMask;
  753. auto g = MetadataGroup::Load(local_metadata, group_index);
  754. auto empty_matched_range = g.MatchEmpty();
  755. if (empty_matched_range) {
  756. local_metadata[index] = MetadataGroup::Empty;
  757. ++growth_budget_;
  758. } else {
  759. local_metadata[index] = MetadataGroup::Deleted;
  760. }
  761. if constexpr (!EntryT::IsTriviallyDestructible) {
  762. entry->Destroy();
  763. }
  764. return true;
  765. }
  766. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  767. auto BaseImpl<InputKeyT, InputValueT, InputKeyContextT>::ClearImpl() -> void {
  768. view_impl_.ForEachEntry(
  769. [](EntryT& entry) {
  770. if constexpr (!EntryT::IsTriviallyDestructible) {
  771. entry.Destroy();
  772. }
  773. },
  774. [](uint8_t* metadata_group) {
  775. // Clear the group.
  776. std::memset(metadata_group, 0, GroupSize);
  777. });
  778. growth_budget_ = GrowthThresholdForAllocSize(alloc_size());
  779. }
  780. // Allocates the appropriate memory layout for a table of the given
  781. // `alloc_size`, with space both for the metadata array and entries.
  782. //
  783. // The returned pointer *must* be deallocated by calling the below `Deallocate`
  784. // function with the same `alloc_size` as used here.
  785. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  786. auto BaseImpl<InputKeyT, InputValueT, InputKeyContextT>::Allocate(
  787. ssize_t alloc_size) -> Storage* {
  788. return reinterpret_cast<Storage*>(__builtin_operator_new(
  789. AllocByteSize(alloc_size), static_cast<std::align_val_t>(Alignment),
  790. std::nothrow_t()));
  791. }
  792. // Deallocates a table's storage that was allocated with the `Allocate`
  793. // function.
  794. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  795. auto BaseImpl<InputKeyT, InputValueT, InputKeyContextT>::Deallocate(
  796. Storage* storage, ssize_t alloc_size) -> void {
  797. ssize_t allocated_size = AllocByteSize(alloc_size);
  798. // We don't need the size, but make sure it always compiles.
  799. static_cast<void>(allocated_size);
  800. __builtin_operator_delete(storage,
  801. #if __cpp_sized_deallocation
  802. allocated_size,
  803. #endif
  804. static_cast<std::align_val_t>(Alignment));
  805. }
  806. // Construct a table using the provided small storage if `small_alloc_size_` is
  807. // non-zero. If `small_alloc_size_` is zero, then `small_storage` won't be used
  808. // and can be null. Regardless, after this the storage pointer is non-null and
  809. // the size is non-zero so that we can directly begin inserting or querying the
  810. // table.
  811. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  812. auto BaseImpl<InputKeyT, InputValueT, InputKeyContextT>::Construct(
  813. Storage* small_storage) -> void {
  814. if (small_alloc_size_ > 0) {
  815. alloc_size() = small_alloc_size_;
  816. storage() = small_storage;
  817. } else {
  818. // Directly allocate the initial buffer so that the hashtable is never in
  819. // an empty state.
  820. alloc_size() = MinAllocatedSize;
  821. storage() = Allocate(MinAllocatedSize);
  822. }
  823. std::memset(metadata(), 0, alloc_size());
  824. growth_budget_ = GrowthThresholdForAllocSize(alloc_size());
  825. }
  826. // Destroy the current table, releasing any memory used.
  827. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  828. auto BaseImpl<InputKeyT, InputValueT, InputKeyContextT>::Destroy() -> void {
  829. // Check for a moved-from state and don't do anything. Only a moved-from table
  830. // has a zero size.
  831. if (alloc_size() == 0) {
  832. return;
  833. }
  834. // Destroy all the entries.
  835. if constexpr (!EntryT::IsTriviallyDestructible) {
  836. view_impl_.ForEachEntry([](EntryT& entry) { entry.Destroy(); },
  837. [](auto...) {});
  838. }
  839. // If small, nothing to deallocate.
  840. if (is_small()) {
  841. return;
  842. }
  843. // Just deallocate the storage without updating anything when destroying the
  844. // object.
  845. Deallocate(storage(), alloc_size());
  846. }
  847. // Optimized routine to insert a key into a table when that key *definitely*
  848. // isn't present in the table and the table *definitely* has a viable empty slot
  849. // (and growth space) to insert into before any deleted slots. When both of
  850. // these are true, typically just after growth, we can dramatically simplify the
  851. // insert position search.
  852. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  853. template <typename LookupKeyT>
  854. [[clang::noinline]] auto
  855. BaseImpl<InputKeyT, InputValueT, InputKeyContextT>::InsertIntoEmpty(
  856. LookupKeyT lookup_key, KeyContextT key_context) -> EntryT* {
  857. HashCode hash = key_context.HashKey(lookup_key, ComputeSeed());
  858. auto [hash_index, tag] = hash.ExtractIndexAndTag<7>();
  859. uint8_t* local_metadata = metadata();
  860. EntryT* local_entries = entries();
  861. for (ProbeSequence s(hash_index, alloc_size());; s.Next()) {
  862. ssize_t group_index = s.index();
  863. auto g = MetadataGroup::Load(local_metadata, group_index);
  864. if (auto empty_match = g.MatchEmpty()) {
  865. ssize_t index = group_index + empty_match.index();
  866. local_metadata[index] = tag | MetadataGroup::PresentMask;
  867. return &local_entries[index];
  868. }
  869. // Otherwise we continue probing.
  870. }
  871. }
  872. // Apply our doubling growth strategy and (re-)check invariants around table
  873. // size.
  874. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  875. auto BaseImpl<InputKeyT, InputValueT, InputKeyContextT>::ComputeNextAllocSize(
  876. ssize_t old_alloc_size) -> ssize_t {
  877. CARBON_DCHECK(llvm::isPowerOf2_64(old_alloc_size))
  878. << "Expected a power of two!";
  879. ssize_t new_alloc_size;
  880. bool overflow = __builtin_mul_overflow(old_alloc_size, 2, &new_alloc_size);
  881. CARBON_CHECK(!overflow) << "Computing the new size overflowed `ssize_t`!";
  882. return new_alloc_size;
  883. }
  884. // Compute the growth threshold for a given size.
  885. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  886. auto BaseImpl<InputKeyT, InputValueT,
  887. InputKeyContextT>::GrowthThresholdForAllocSize(ssize_t alloc_size)
  888. -> ssize_t {
  889. // We use a 7/8ths load factor to trigger growth.
  890. return alloc_size - alloc_size / 8;
  891. }
  892. // Grow the hashtable to create space and then insert into it. Returns the
  893. // selected insertion entry. Never returns null. In addition to growing and
  894. // selecting the insertion entry, this routine updates the metadata array so
  895. // that this function can be directly called and the result returned from
  896. // `InsertImpl`.
  897. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  898. template <typename LookupKeyT>
  899. [[clang::noinline]] auto
  900. BaseImpl<InputKeyT, InputValueT, InputKeyContextT>::GrowAndInsert(
  901. LookupKeyT lookup_key, KeyContextT key_context) -> EntryT* {
  902. // We collect the probed elements in a small vector for re-insertion. It is
  903. // tempting to reuse the already allocated storage, but doing so appears to
  904. // be a (very slight) performance regression. These are relatively rare and
  905. // storing them into the existing storage creates stores to the same regions
  906. // of memory we're reading. Moreover, it requires moving both the key and the
  907. // value twice, and doing the `memcpy` widening for relocatable types before
  908. // the group walk rather than after the group walk. In practice, between the
  909. // statistical rareness and using a large small size buffer here on the stack,
  910. // we can handle this most efficiently with temporary, additional storage.
  911. llvm::SmallVector<ssize_t, 128> probed_indices;
  912. // We grow into a new `MapBase` so that both the new and old maps are
  913. // fully functional until all the entries are moved over. However, we directly
  914. // manipulate the internals to short circuit many aspects of the growth.
  915. ssize_t old_size = alloc_size();
  916. CARBON_DCHECK(old_size > 0);
  917. CARBON_DCHECK(growth_budget_ == 0);
  918. bool old_small = is_small();
  919. Storage* old_storage = storage();
  920. uint8_t* old_metadata = metadata();
  921. EntryT* old_entries = entries();
  922. #ifndef NDEBUG
  923. // Count how many of the old table slots will end up being empty after we grow
  924. // the table. This is both the currently empty slots, but also the deleted
  925. // slots because we clear them to empty and re-insert everything that had any
  926. // probing.
  927. ssize_t debug_empty_count =
  928. llvm::count(llvm::ArrayRef(old_metadata, old_size), MetadataGroup::Empty);
  929. ssize_t debug_deleted_count = llvm::count(
  930. llvm::ArrayRef(old_metadata, old_size), MetadataGroup::Deleted);
  931. CARBON_DCHECK(debug_empty_count >=
  932. (old_size - GrowthThresholdForAllocSize(old_size)))
  933. << "debug_empty_count: " << debug_empty_count
  934. << ", debug_deleted_count: " << debug_deleted_count
  935. << ", size: " << old_size;
  936. #endif
  937. // Compute the new size and grow the storage in place (if possible).
  938. ssize_t new_size = ComputeNextAllocSize(old_size);
  939. alloc_size() = new_size;
  940. storage() = Allocate(new_size);
  941. growth_budget_ = GrowthThresholdForAllocSize(new_size);
  942. // Now extract the new components of the table.
  943. uint8_t* new_metadata = metadata();
  944. EntryT* new_entries = entries();
  945. // We always double the size when we grow. This allows an important
  946. // optimization -- we're adding exactly one more high bit to the hash-computed
  947. // index for each entry. This in turn means we can classify every entry in the
  948. // table into three cases:
  949. //
  950. // 1) The new high bit is zero, the entry is at the same index in the new
  951. // table as the old.
  952. //
  953. // 2) The new high bit is one, the entry is at the old index plus the old
  954. // size.
  955. //
  956. // 3) The entry's current index doesn't match the initial hash index because
  957. // it required some amount of probing to find an empty slot.
  958. //
  959. // The design of the hash table tries to minimize how many entries fall into
  960. // case (3), so we expect the vast majority of entries to be in (1) or (2).
  961. // This lets us model growth notionally as duplicating the hash table,
  962. // clearing out the empty slots, and inserting any probed elements.
  963. ssize_t count = 0;
  964. for (ssize_t group_index = 0; group_index < old_size;
  965. group_index += GroupSize) {
  966. auto low_g = MetadataGroup::Load(old_metadata, group_index);
  967. // Make sure to match present elements first to enable pipelining with
  968. // clearing.
  969. auto present_matched_range = low_g.MatchPresent();
  970. low_g.ClearDeleted();
  971. MetadataGroup high_g;
  972. if constexpr (MetadataGroup::FastByteClear) {
  973. // When we have a fast byte clear, we can update the metadata for the
  974. // growth in-register and store at the end.
  975. high_g = low_g;
  976. } else {
  977. // If we don't have a fast byte clear, we can store the metadata group
  978. // eagerly here and overwrite bytes with a byte store below instead of
  979. // clearing the byte in-register.
  980. low_g.Store(new_metadata, group_index);
  981. low_g.Store(new_metadata, group_index | old_size);
  982. }
  983. for (ssize_t byte_index : present_matched_range) {
  984. ++count;
  985. ssize_t old_index = group_index + byte_index;
  986. if constexpr (!MetadataGroup::FastByteClear) {
  987. CARBON_DCHECK(new_metadata[old_index] == old_metadata[old_index]);
  988. CARBON_DCHECK(new_metadata[old_index | old_size] ==
  989. old_metadata[old_index]);
  990. }
  991. HashCode hash =
  992. key_context.HashKey(old_entries[old_index].key(), ComputeSeed());
  993. ssize_t old_hash_index = hash.ExtractIndexAndTag<7>().first &
  994. ComputeProbeMaskFromSize(old_size);
  995. if (LLVM_UNLIKELY(old_hash_index != group_index)) {
  996. probed_indices.push_back(old_index);
  997. if constexpr (MetadataGroup::FastByteClear) {
  998. low_g.ClearByte(byte_index);
  999. high_g.ClearByte(byte_index);
  1000. } else {
  1001. new_metadata[old_index] = MetadataGroup::Empty;
  1002. new_metadata[old_index | old_size] = MetadataGroup::Empty;
  1003. }
  1004. continue;
  1005. }
  1006. ssize_t new_index = hash.ExtractIndexAndTag<7>().first &
  1007. ComputeProbeMaskFromSize(new_size);
  1008. CARBON_DCHECK(new_index == old_hash_index ||
  1009. new_index == (old_hash_index | old_size));
  1010. // Toggle the newly added bit of the index to get to the other possible
  1011. // target index.
  1012. if constexpr (MetadataGroup::FastByteClear) {
  1013. (new_index == old_hash_index ? high_g : low_g).ClearByte(byte_index);
  1014. new_index += byte_index;
  1015. } else {
  1016. new_index += byte_index;
  1017. new_metadata[new_index ^ old_size] = MetadataGroup::Empty;
  1018. }
  1019. // If we need to explicitly move (and destroy) the key or value, do so
  1020. // here where we already know its target.
  1021. if constexpr (!EntryT::IsTriviallyRelocatable) {
  1022. new_entries[new_index].MoveFrom(std::move(old_entries[old_index]));
  1023. }
  1024. }
  1025. if constexpr (MetadataGroup::FastByteClear) {
  1026. low_g.Store(new_metadata, group_index);
  1027. high_g.Store(new_metadata, (group_index | old_size));
  1028. }
  1029. }
  1030. CARBON_DCHECK((count - static_cast<ssize_t>(probed_indices.size())) ==
  1031. (new_size - llvm::count(llvm::ArrayRef(new_metadata, new_size),
  1032. MetadataGroup::Empty)));
  1033. #ifndef NDEBUG
  1034. CARBON_DCHECK((debug_empty_count + debug_deleted_count) ==
  1035. (old_size - count));
  1036. CARBON_DCHECK(llvm::count(llvm::ArrayRef(new_metadata, new_size),
  1037. MetadataGroup::Empty) ==
  1038. debug_empty_count + debug_deleted_count +
  1039. static_cast<ssize_t>(probed_indices.size()) + old_size);
  1040. #endif
  1041. // If the keys or values are trivially relocatable, we do a bulk memcpy of
  1042. // them into place. This will copy them into both possible locations, which is
  1043. // fine. One will be empty and clobbered if reused or ignored. The other will
  1044. // be the one used. This might seem like it needs it to be valid for us to
  1045. // create two copies, but it doesn't. This produces the exact same storage as
  1046. // copying the storage into the wrong location first, and then again into the
  1047. // correct location. Only one is live and only one is destroyed.
  1048. if constexpr (EntryT::IsTriviallyRelocatable) {
  1049. memcpy(new_entries, old_entries, old_size * sizeof(EntryT));
  1050. memcpy(new_entries + old_size, old_entries, old_size * sizeof(EntryT));
  1051. }
  1052. // We then need to do a normal insertion for anything that was probed before
  1053. // growth, but we know we'll find an empty slot, so leverage that.
  1054. for (ssize_t old_index : probed_indices) {
  1055. EntryT* new_entry =
  1056. InsertIntoEmpty(old_entries[old_index].key(), key_context);
  1057. new_entry->MoveFrom(std::move(old_entries[old_index]));
  1058. }
  1059. CARBON_DCHECK(count ==
  1060. (new_size - llvm::count(llvm::ArrayRef(new_metadata, new_size),
  1061. MetadataGroup::Empty)));
  1062. growth_budget_ -= count;
  1063. CARBON_DCHECK(growth_budget_ ==
  1064. (GrowthThresholdForAllocSize(new_size) -
  1065. (new_size - llvm::count(llvm::ArrayRef(new_metadata, new_size),
  1066. MetadataGroup::Empty))));
  1067. CARBON_DCHECK(growth_budget_ > 0 &&
  1068. "Must still have a growth budget after rehash!");
  1069. if (!old_small) {
  1070. // Old isn't a small buffer, so we need to deallocate it.
  1071. Deallocate(old_storage, old_size);
  1072. }
  1073. // And lastly insert the lookup_key into an index in the newly grown map and
  1074. // return that index for use.
  1075. --growth_budget_;
  1076. return InsertIntoEmpty(lookup_key, key_context);
  1077. }
  1078. template <typename InputBaseT, ssize_t SmallSize>
  1079. TableImpl<InputBaseT, SmallSize>::TableImpl(const TableImpl& arg)
  1080. : BaseT(arg.alloc_size(), arg.growth_budget_, SmallSize) {
  1081. CARBON_DCHECK(arg.small_alloc_size_ == SmallSize);
  1082. ssize_t local_size = arg.alloc_size();
  1083. if (SmallSize > 0 && arg.is_small()) {
  1084. CARBON_DCHECK(local_size == SmallSize);
  1085. this->storage() = small_storage();
  1086. } else {
  1087. this->storage() = BaseT::Allocate(local_size);
  1088. }
  1089. // Preserve which slot every entry is in, including tombstones in the
  1090. // metadata, in order to copy into the new table's storage without rehashing
  1091. // all of the keys. This is especially important as we don't have an easy way
  1092. // to access the key context needed for rehashing here.
  1093. uint8_t* local_metadata = this->metadata();
  1094. EntryT* local_entries = this->entries();
  1095. const uint8_t* local_arg_metadata = arg.metadata();
  1096. const EntryT* local_arg_entries = arg.entries();
  1097. memcpy(local_metadata, local_arg_metadata, local_size);
  1098. for (ssize_t group_index = 0; group_index < local_size;
  1099. group_index += GroupSize) {
  1100. auto g = MetadataGroup::Load(local_arg_metadata, group_index);
  1101. for (ssize_t byte_index : g.MatchPresent()) {
  1102. local_entries[group_index + byte_index].CopyFrom(
  1103. local_arg_entries[group_index + byte_index]);
  1104. }
  1105. }
  1106. }
  1107. // Puts the incoming table into a moved-from state that can be destroyed or
  1108. // re-initialized but must not be used otherwise.
  1109. template <typename InputBaseT, ssize_t SmallSize>
  1110. TableImpl<InputBaseT, SmallSize>::TableImpl(TableImpl&& arg) noexcept
  1111. : BaseT(arg.alloc_size(), arg.growth_budget_, SmallSize) {
  1112. CARBON_DCHECK(arg.small_alloc_size_ == SmallSize);
  1113. ssize_t local_size = arg.alloc_size();
  1114. if (SmallSize > 0 && arg.is_small()) {
  1115. CARBON_DCHECK(local_size == SmallSize);
  1116. this->storage() = small_storage();
  1117. // For small tables, we have to move the entries as we can't move the tables
  1118. // themselves. We do this preserving their slots and even tombstones to
  1119. // avoid rehashing.
  1120. uint8_t* local_metadata = this->metadata();
  1121. EntryT* local_entries = this->entries();
  1122. uint8_t* local_arg_metadata = arg.metadata();
  1123. EntryT* local_arg_entries = arg.entries();
  1124. memcpy(local_metadata, local_arg_metadata, local_size);
  1125. if (EntryT::IsTriviallyRelocatable) {
  1126. memcpy(local_entries, local_arg_entries, SmallSize * sizeof(EntryT));
  1127. } else {
  1128. for (ssize_t group_index = 0; group_index < local_size;
  1129. group_index += GroupSize) {
  1130. auto g = MetadataGroup::Load(local_arg_metadata, group_index);
  1131. for (ssize_t byte_index : g.MatchPresent()) {
  1132. local_entries[group_index + byte_index].MoveFrom(
  1133. std::move(local_arg_entries[group_index + byte_index]));
  1134. }
  1135. }
  1136. }
  1137. } else {
  1138. // Just point to the allocated storage.
  1139. this->storage() = arg.storage();
  1140. }
  1141. // Finally, put the incoming table into a moved-from state.
  1142. arg.alloc_size() = 0;
  1143. // Replace the pointer with null to ease debugging.
  1144. arg.storage() = nullptr;
  1145. }
  1146. // Reset a table to its original state, including releasing any allocated
  1147. // memory.
  1148. template <typename InputBaseT, ssize_t SmallSize>
  1149. auto TableImpl<InputBaseT, SmallSize>::ResetImpl() -> void {
  1150. this->Destroy();
  1151. // Re-initialize the whole thing.
  1152. CARBON_DCHECK(this->small_alloc_size() == SmallSize);
  1153. this->Construct(small_storage());
  1154. }
  1155. template <typename InputBaseT, ssize_t SmallSize>
  1156. auto TableImpl<InputBaseT, SmallSize>::small_storage() const -> Storage* {
  1157. if constexpr (SmallSize > 0) {
  1158. // Do a bunch of validation of the small size to establish our invariants
  1159. // when we know we have a non-zero small size.
  1160. static_assert(llvm::isPowerOf2_64(SmallSize),
  1161. "SmallSize must be a power of two for a hashed buffer!");
  1162. static_assert(
  1163. SmallSize >= MaxGroupSize,
  1164. "We require all small sizes to multiples of the largest group "
  1165. "size supported to ensure it can be used portably. ");
  1166. static_assert(
  1167. (SmallSize % MaxGroupSize) == 0,
  1168. "Small size must be a multiple of the max group size supported "
  1169. "so that we can allocate a whole number of groups.");
  1170. // Implied by the max asserts above.
  1171. static_assert(SmallSize >= GroupSize);
  1172. static_assert((SmallSize % GroupSize) == 0);
  1173. static_assert(SmallSize >= alignof(StorageEntry<KeyT, ValueT>),
  1174. "Requested a small size that would require padding between "
  1175. "metadata bytes and correctly aligned key and value types. "
  1176. "Either a larger small size or a zero small size and heap "
  1177. "allocation are required for this key and value type.");
  1178. static_assert(offsetof(SmallStorage, entries) == SmallSize,
  1179. "Offset to entries in small size storage doesn't match "
  1180. "computed offset!");
  1181. return &small_storage_;
  1182. } else {
  1183. static_assert(
  1184. sizeof(TableImpl) == sizeof(BaseT),
  1185. "Empty small storage caused a size difference and wasted space!");
  1186. return nullptr;
  1187. }
  1188. }
  1189. } // namespace Carbon::RawHashtable
  1190. #endif // CARBON_COMMON_RAW_HASHTABLE_H_