raw_hashtable.h 71 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #ifndef CARBON_COMMON_RAW_HASHTABLE_H_
  5. #define CARBON_COMMON_RAW_HASHTABLE_H_
  6. #include <algorithm>
  7. #include <concepts>
  8. #include <cstddef>
  9. #include <cstring>
  10. #include <iterator>
  11. #include <new>
  12. #include <type_traits>
  13. #include <utility>
  14. #include "common/check.h"
  15. #include "common/concepts.h"
  16. #include "common/hashing.h"
  17. #include "common/raw_hashtable_metadata_group.h"
  18. #include "llvm/Support/Compiler.h"
  19. #include "llvm/Support/MathExtras.h"
  20. // A namespace collecting a set of low-level utilities for building hashtable
  21. // data structures. These should only be used as implementation details of
  22. // higher-level data-structure APIs.
  23. //
  24. // The utilities here use the `hashtable_key_context.h` provided `KeyContext` to
  25. // support the necessary hashtable operations on keys: hashing and comparison.
  26. // This also serves as the customization point for hashtables built on this
  27. // infrastructure for those operations. See that header file for details.
  28. //
  29. // These utilities support hashtables following a *specific* API design pattern,
  30. // and using Small-Size Optimization, or "SSO", when desired. We expect there to
  31. // be three layers to any hashtable design:
  32. //
  33. // - A *view* type: a read-only view of the hashtable contents. This type should
  34. // be a value type and is expected to be passed by-value in APIs. However, it
  35. // will have `const`-reference semantics, much like a `std::string_view`. Note
  36. // that the *entries* will continue to be mutable, it is only the *table* that
  37. // is read-only.
  38. //
  39. // - A *base* type: a base class type of the actual hashtable, which allows
  40. // almost all mutable operations but erases any specific SSO buffer size.
  41. // Because this is a base of the actual hash table, it is designed to be
  42. // passed as a non-`const` reference or pointer.
  43. //
  44. // - A *table* type: the actual hashtable which derives from the base type and
  45. // adds any desired SSO storage buffer. Beyond the physical storage, it also
  46. // allows resetting the table to its initial state & allocated size, as well
  47. // as copying and moving the table.
  48. //
  49. // For complete examples of the API design, see `set.h` for a hashtable-based
  50. // set data structure, and `map.h` for a hashtable-based map data structure.
  51. //
  52. // The hashtable design implemented here has several key invariants and design
  53. // elements that are essential to all three of the types above and the
  54. // functionality they provide.
  55. //
  56. // - The underlying hashtable uses [open addressing], a power-of-two table size,
  57. // and quadratic probing rather than closed addressing and chaining.
  58. //
  59. // [open addressing]: https://en.wikipedia.org/wiki/Open_addressing
  60. //
  61. // - Each _slot_ in the table corresponds to a key, a value, and one byte of
  62. // metadata. Each _entry_ is a key and value. The key and value for an entry
  63. // are stored together.
  64. //
  65. // - The allocated storage is organized into an array of metadata bytes followed
  66. // by an array of entry storage.
  67. //
  68. // - The metadata byte corresponding to each entry marks that entry is either
  69. // empty, deleted, or present. When present, a 7-bit tag is also stored using
  70. // another 7 bits from the hash of the entry key.
  71. //
  72. // - The storage for an entry is an internal type that should not be exposed to
  73. // users, and instead only the underlying keys and values.
  74. //
  75. // - The hash addressing and probing occurs over *groups* of slots rather than
  76. // individual entries. When inserting a new entry, it can be added to the
  77. // group it hashes to as long it is not full, and can even replace a slot with
  78. // a tombstone indicating a previously deleted entry. Only when the group is
  79. // full will it look at the next group in the probe sequence. As a result,
  80. // there may be entries in a group where a different group is the start of
  81. // that entry's probe sequence. Also, when performing a lookup, every group in
  82. // the probe sequence must be inspected for the lookup key until it is found
  83. // or the group has an empty slot.
  84. //
  85. // - Groups are scanned rapidly using the one-byte metadata for each entry in
  86. // the group and CPU instructions that allow comparing all of the metadata for
  87. // a group in parallel. For more details on the metadata group encoding and
  88. // scanning, see `raw_hashtable_metadata_group.h`.
  89. //
  90. // - `GroupSize` is a platform-specific relatively small power of two that fits
  91. // in some hardware register. However, `MaxGroupSize` is provided as a
  92. // portable max that is also a power of two. The table storage, whether
  93. // provided by an SSO buffer or allocated, is required to be a multiple of
  94. // `MaxGroupSize` to keep the requirement portable but sufficient for all
  95. // platforms.
  96. //
  97. // - There is *always* an allocated table of some multiple of `MaxGroupSize`.
  98. // This allows accesses to be branchless. When heap allocated, we pro-actively
  99. // allocate at least a minimum heap size table. When there is a small-size
  100. // optimization (SSO) buffer, that provides the initial allocation.
  101. //
  102. // - The table performs a minimal amount of bookkeeping that limits the APIs it
  103. // can support:
  104. // - `alloc_size` is the size of the table *allocated* (not *used*), and is
  105. // always a power of 2 at least as big as `MinAllocatedSize`.
  106. // - `storage` is a pointer to the storage for the `alloc_size` slots of the
  107. // table, and never null.
  108. // - `small_alloc_size` is the maximum `alloc_size` where the table is stored
  109. // in the object itself instead of separately on the heap. In this case,
  110. // `storage` points to `small_storage_`.
  111. // - `growth_budget` is the number of entries that may be added before the
  112. // table allocation is doubled. It is always
  113. // `GrowthThresholdForAllocSize(alloc_size)` minus the number of
  114. // non-empty (filled or deleted) slots. If it ever falls to 0, the table
  115. // is grown to keep it greater than 0.
  116. // There is also the "moved-from" state where the table may only be
  117. // reinitialized or destroyed where the `alloc_size` is 0 and `storage` is
  118. // null. Since it doesn't track the exact number of filled entries in a table,
  119. // it doesn't support a container-style `size` API.
  120. //
  121. // - There is no direct iterator support because of the complexity of embedding
  122. // the group-based metadata scanning into an iterator model. Instead, there is
  123. // just a for-each method that is passed a lambda to observe all entries. The
  124. // order of this observation is also not guaranteed.
  125. namespace Carbon::RawHashtable {
  126. // Which prefetch strategies to enable can be controlled via macros to enable
  127. // doing experiments.
  128. //
  129. // Currently, benchmarking on both modern AMD and ARM CPUs seems to indicate
  130. // that the entry group prefetching is more beneficial than metadata, but that
  131. // benefit is degraded when enabling them both. This determined our current
  132. // default of no metadata prefetch but enabled entry group prefetch.
  133. //
  134. // Override these by defining them as part of the build explicitly to either `0`
  135. // or `1`. If left undefined, the defaults will be supplied.
  136. #ifndef CARBON_ENABLE_PREFETCH_METADATA
  137. #define CARBON_ENABLE_PREFETCH_METADATA 0
  138. #endif
  139. #ifndef CARBON_ENABLE_PREFETCH_ENTRY_GROUP
  140. #define CARBON_ENABLE_PREFETCH_ENTRY_GROUP 1
  141. #endif
  142. // If allocating storage, allocate a minimum of one cacheline of group metadata
  143. // or a minimum of one group, whichever is larger.
  144. constexpr ssize_t MinAllocatedSize = std::max<ssize_t>(64, MaxGroupSize);
  145. // An entry in the hashtable storage of a `KeyT` and `ValueT` object.
  146. //
  147. // Allows manual construction, destruction, and access to these values so we can
  148. // create arrays af the entries prior to populating them with actual keys and
  149. // values.
  150. template <typename KeyT, typename ValueT>
  151. struct StorageEntry {
  152. static constexpr bool IsTriviallyDestructible =
  153. std::is_trivially_destructible_v<KeyT> &&
  154. std::is_trivially_destructible_v<ValueT>;
  155. static constexpr bool IsTriviallyRelocatable =
  156. IsTriviallyDestructible && std::is_trivially_move_constructible_v<KeyT> &&
  157. std::is_trivially_move_constructible_v<ValueT>;
  158. static constexpr bool IsCopyable =
  159. IsTriviallyRelocatable || (std::is_copy_constructible_v<KeyT> &&
  160. std::is_copy_constructible_v<ValueT>);
  161. auto key() const -> const KeyT& {
  162. // Ensure we don't need more alignment than available. Inside a method body
  163. // to apply to the complete type.
  164. static_assert(
  165. alignof(StorageEntry) <= MinAllocatedSize,
  166. "The minimum allocated size turns into the alignment of our array of "
  167. "storage entries as they follow the metadata byte array.");
  168. return *std::launder(reinterpret_cast<const KeyT*>(&key_storage));
  169. }
  170. auto key() -> KeyT& {
  171. return const_cast<KeyT&>(const_cast<const StorageEntry*>(this)->key());
  172. }
  173. auto value() const -> const ValueT& {
  174. return *std::launder(reinterpret_cast<const ValueT*>(&value_storage));
  175. }
  176. auto value() -> ValueT& {
  177. return const_cast<ValueT&>(const_cast<const StorageEntry*>(this)->value());
  178. }
  179. // We handle destruction and move manually as we only want to expose distinct
  180. // `KeyT` and `ValueT` subobjects to user code that may need to do in-place
  181. // construction. As a consequence, this struct only provides the storage and
  182. // we have to manually manage the construction, move, and destruction of the
  183. // objects.
  184. auto Destroy() -> void {
  185. static_assert(!IsTriviallyDestructible,
  186. "Should never instantiate when trivial!");
  187. key().~KeyT();
  188. value().~ValueT();
  189. }
  190. auto CopyFrom(const StorageEntry& entry) -> void {
  191. if constexpr (IsTriviallyRelocatable) {
  192. memcpy(this, &entry, sizeof(StorageEntry));
  193. } else {
  194. new (&key_storage) KeyT(entry.key());
  195. new (&value_storage) ValueT(entry.value());
  196. }
  197. }
  198. // Move from an expiring entry and destroy that entry's key and value.
  199. // Optimizes to directly use `memcpy` when correct.
  200. auto MoveFrom(StorageEntry&& entry) -> void {
  201. if constexpr (IsTriviallyRelocatable) {
  202. memcpy(this, &entry, sizeof(StorageEntry));
  203. } else {
  204. new (&key_storage) KeyT(std::move(entry.key()));
  205. entry.key().~KeyT();
  206. new (&value_storage) ValueT(std::move(entry.value()));
  207. entry.value().~ValueT();
  208. }
  209. }
  210. alignas(KeyT) std::byte key_storage[sizeof(KeyT)];
  211. alignas(ValueT) std::byte value_storage[sizeof(ValueT)];
  212. };
  213. // A specialization of the storage entry for sets without a distinct value type.
  214. // Somewhat duplicative with the key-value version, but C++ specialization makes
  215. // doing better difficult.
  216. template <typename KeyT>
  217. struct StorageEntry<KeyT, void> {
  218. static constexpr bool IsTriviallyDestructible =
  219. std::is_trivially_destructible_v<KeyT>;
  220. static constexpr bool IsTriviallyRelocatable =
  221. IsTriviallyDestructible && std::is_trivially_move_constructible_v<KeyT>;
  222. static constexpr bool IsCopyable =
  223. IsTriviallyRelocatable || std::is_copy_constructible_v<KeyT>;
  224. auto key() const -> const KeyT& {
  225. // Ensure we don't need more alignment than available.
  226. static_assert(
  227. alignof(StorageEntry) <= MinAllocatedSize,
  228. "The minimum allocated size turns into the alignment of our array of "
  229. "storage entries as they follow the metadata byte array.");
  230. return *std::launder(reinterpret_cast<const KeyT*>(&key_storage));
  231. }
  232. auto key() -> KeyT& {
  233. return const_cast<KeyT&>(const_cast<const StorageEntry*>(this)->key());
  234. }
  235. auto Destroy() -> void {
  236. static_assert(!IsTriviallyDestructible,
  237. "Should never instantiate when trivial!");
  238. key().~KeyT();
  239. }
  240. auto CopyFrom(const StorageEntry& entry) -> void
  241. requires(IsCopyable)
  242. {
  243. if constexpr (IsTriviallyRelocatable) {
  244. memcpy(this, &entry, sizeof(StorageEntry));
  245. } else {
  246. new (&key_storage) KeyT(entry.key());
  247. }
  248. }
  249. auto MoveFrom(StorageEntry&& entry) -> void {
  250. if constexpr (IsTriviallyRelocatable) {
  251. memcpy(this, &entry, sizeof(StorageEntry));
  252. } else {
  253. new (&key_storage) KeyT(std::move(entry.key()));
  254. entry.key().~KeyT();
  255. }
  256. }
  257. alignas(KeyT) std::byte key_storage[sizeof(KeyT)];
  258. };
  259. struct Metrics {
  260. // How many keys are present in the table.
  261. ssize_t key_count = 0;
  262. // How many slots of the table are reserved due to deleted markers required to
  263. // preserve probe sequences.
  264. ssize_t deleted_count = 0;
  265. // How many bytes of allocated storage are used by the table. Note, does not
  266. // include the table object or any small-size buffer.
  267. ssize_t storage_bytes = 0;
  268. // How many keys have required probing beyond the initial group. These are the
  269. // keys with a probe distance > 0.
  270. ssize_t probed_key_count = 0;
  271. // The probe distance averaged over every key. If every key is in its initial
  272. // group, this will be zero as no keys will have a larger probe distance. In
  273. // general, we want this to be as close to zero as possible.
  274. double probe_avg_distance = 0.0;
  275. // The maximum probe distance found for a single key in the table.
  276. ssize_t probe_max_distance = 0;
  277. // The average number of probing comparisons required to locate a specific key
  278. // in the table. This is how many comparisons are required *before* the key is
  279. // located, or the *failed* comparisons. We always have to do one successful
  280. // comparison at the end. This successful comparison isn't counted because
  281. // that focuses this metric on the overhead the table is introducing, and
  282. // keeps a "perfect" table with an average of `0.0` here similar to the
  283. // perfect average of `0.0` average probe distance.
  284. double probe_avg_compares = 0.0;
  285. // The maximum number of probing comparisons required to locate a specific
  286. // key in the table.
  287. ssize_t probe_max_compares = 0;
  288. };
  289. // A placeholder empty type used to model pointers to the allocated buffer of
  290. // storage.
  291. //
  292. // The allocated storage doesn't have a meaningful static layout -- it consists
  293. // of an array of metadata groups followed by an array of storage entries.
  294. // However, we want to be able to mark pointers to this and so use pointers to
  295. // this placeholder type as that signifier.
  296. //
  297. // This is a complete, empty type so that it can be used as a base class of a
  298. // specific concrete storage type for compile-time sized storage.
  299. struct Storage {};
  300. // Forward declaration to support friending, see the definition below.
  301. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  302. class BaseImpl;
  303. // Implementation helper for defining a read-only view type for a hashtable.
  304. //
  305. // A specific user-facing hashtable view type should derive privately from this
  306. // type, and forward the implementation of its interface to functions in this
  307. // type.
  308. //
  309. // The methods available to user-facing hashtable types are `protected`, and
  310. // where they are expected to directly map to a public API, named with an
  311. // `Impl`. The suffix naming ensures types don't `using` in these low-level APIs
  312. // but declare their own and implement them by forwarding to these APIs. We
  313. // don't want users to have to read these implementation details to understand
  314. // their container's API, so none of these methods should be `using`-ed into the
  315. // user facing types.
  316. //
  317. // Some of the types are just convenience aliases and aren't important to
  318. // surface as part of the user-facing type API for readers and so those are
  319. // reasonable to add via a `using`.
  320. //
  321. // Some methods are used by other parts of the raw hashtable implementation.
  322. // Those are kept `private` and where necessary the other components of the raw
  323. // hashtable implementation are friended to give access to them.
  324. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  325. class ViewImpl {
  326. protected:
  327. using KeyT = InputKeyT;
  328. using ValueT = InputValueT;
  329. using KeyContextT = InputKeyContextT;
  330. using EntryT = StorageEntry<KeyT, ValueT>;
  331. using MetricsT = Metrics;
  332. friend class BaseImpl<KeyT, ValueT, KeyContextT>;
  333. template <typename InputBaseT, ssize_t SmallSize>
  334. friend class TableImpl;
  335. // Make more-`const` types friends to enable conversions that add `const`.
  336. friend class ViewImpl<const KeyT, ValueT, KeyContextT>;
  337. friend class ViewImpl<KeyT, const ValueT, KeyContextT>;
  338. friend class ViewImpl<const KeyT, const ValueT, KeyContextT>;
  339. ViewImpl() = default;
  340. // Support adding `const` to either key or value type of some other view.
  341. template <typename OtherKeyT, typename OtherValueT>
  342. // NOLINTNEXTLINE(google-explicit-constructor)
  343. ViewImpl(ViewImpl<OtherKeyT, OtherValueT, KeyContextT> other_view)
  344. requires(SameAsOneOf<KeyT, OtherKeyT, const OtherKeyT> &&
  345. SameAsOneOf<ValueT, OtherValueT, const OtherValueT>)
  346. : alloc_size_(other_view.alloc_size_), storage_(other_view.storage_) {}
  347. // Looks up an entry in the hashtable and returns its address or null if not
  348. // present.
  349. template <typename LookupKeyT>
  350. auto LookupEntry(LookupKeyT lookup_key, KeyContextT key_context) const
  351. -> EntryT*;
  352. // Calls `entry_callback` for each entry in the hashtable. All the entries
  353. // within a specific group are visited first, and then `group_callback` is
  354. // called on the group itself. The `group_callback` is typically only used by
  355. // the internals of the hashtable.
  356. template <typename EntryCallbackT, typename GroupCallbackT>
  357. auto ForEachEntry(EntryCallbackT entry_callback,
  358. GroupCallbackT group_callback) const -> void;
  359. // Returns a collection of informative metrics on the the current state of the
  360. // table, useful for performance analysis. These include relatively slow to
  361. // compute metrics requiring deep inspection of the table's state.
  362. auto ComputeMetricsImpl(KeyContextT key_context) const -> MetricsT;
  363. private:
  364. ViewImpl(ssize_t alloc_size, Storage* storage)
  365. : alloc_size_(alloc_size), storage_(storage) {}
  366. // Computes the offset from the metadata array to the entries array for a
  367. // given size. This is trivial, but we use this routine to enforce invariants
  368. // on the sizes.
  369. static constexpr auto EntriesOffset(ssize_t alloc_size) -> ssize_t {
  370. CARBON_DCHECK(llvm::isPowerOf2_64(alloc_size),
  371. "Size must be a power of two for a hashed buffer!");
  372. // The size is always a power of two. We prevent any too-small sizes so it
  373. // being a power of two provides the needed alignment. As a result, the
  374. // offset is exactly the size. We validate this here to catch alignment bugs
  375. // early.
  376. CARBON_DCHECK(static_cast<uint64_t>(alloc_size) ==
  377. llvm::alignTo<alignof(EntryT)>(alloc_size));
  378. return alloc_size;
  379. }
  380. // Compute the allocated table's byte size.
  381. static constexpr auto AllocByteSize(ssize_t alloc_size) -> ssize_t {
  382. return EntriesOffset(alloc_size) + sizeof(EntryT) * alloc_size;
  383. }
  384. auto metadata() const -> uint8_t* {
  385. return reinterpret_cast<uint8_t*>(storage_);
  386. }
  387. auto entries() const -> EntryT* {
  388. return reinterpret_cast<EntryT*>(reinterpret_cast<std::byte*>(storage_) +
  389. EntriesOffset(alloc_size_));
  390. }
  391. // Prefetch the metadata prior to probing. This is to overlap any of the
  392. // memory access latency we can with the hashing of a key or other
  393. // latency-bound operation prior to probing.
  394. auto PrefetchMetadata() const -> void {
  395. if constexpr (CARBON_ENABLE_PREFETCH_METADATA) {
  396. // Prefetch with a "low" temporal locality as we're primarily expecting a
  397. // brief use of the metadata and then to return to application code.
  398. __builtin_prefetch(metadata(), /*read*/ 0, /*low-locality*/ 1);
  399. }
  400. }
  401. // Prefetch an entry. This prefetches for read as it is primarily expected to
  402. // be used in the probing path, and writing afterwards isn't especially slowed
  403. // down. We don't want to synthesize writes unless we *know* we're going to
  404. // write.
  405. static auto PrefetchEntryGroup(const EntryT* entry_group) -> void {
  406. if constexpr (CARBON_ENABLE_PREFETCH_ENTRY_GROUP) {
  407. // Prefetch with a "low" temporal locality as we're primarily expecting a
  408. // brief use of the entries and then to return to application code.
  409. __builtin_prefetch(entry_group, /*read*/ 0, /*low-locality*/ 1);
  410. }
  411. }
  412. ssize_t alloc_size_;
  413. Storage* storage_;
  414. };
  415. // Implementation helper for defining a read-write base type for a hashtable
  416. // that type-erases any SSO buffer.
  417. //
  418. // A specific user-facing hashtable base type should derive using *`protected`*
  419. // inheritance from this type, and forward the implementation of its interface
  420. // to functions in this type.
  421. //
  422. // Other than the use of `protected` inheritance, the patterns for this type,
  423. // and how to build user-facing hashtable base types from it, mirror those of
  424. // `ViewImpl`. See its documentation for more details.
  425. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  426. class BaseImpl {
  427. protected:
  428. using KeyT = InputKeyT;
  429. using ValueT = InputValueT;
  430. using KeyContextT = InputKeyContextT;
  431. using ViewImplT = ViewImpl<KeyT, ValueT, KeyContextT>;
  432. using EntryT = typename ViewImplT::EntryT;
  433. using MetricsT = typename ViewImplT::MetricsT;
  434. BaseImpl(int small_alloc_size, Storage* small_storage)
  435. : small_alloc_size_(small_alloc_size) {
  436. CARBON_CHECK(small_alloc_size >= 0);
  437. Construct(small_storage);
  438. }
  439. // Only used for copying and moving, and leaves storage uninitialized.
  440. BaseImpl(ssize_t alloc_size, int growth_budget, int small_alloc_size)
  441. : view_impl_(alloc_size, nullptr),
  442. growth_budget_(growth_budget),
  443. small_alloc_size_(small_alloc_size) {}
  444. // Destruction must be handled by the table where it can destroy entries in
  445. // any small buffer, so make the base destructor protected but defaulted here.
  446. ~BaseImpl() = default;
  447. // NOLINTNEXTLINE(google-explicit-constructor): Designed to implicitly decay.
  448. operator ViewImplT() const { return view_impl(); }
  449. auto view_impl() const -> ViewImplT { return view_impl_; }
  450. // Looks up the provided key in the hashtable. If found, returns a pointer to
  451. // that entry and `false`.
  452. //
  453. // If not found, will locate an empty entry for inserting into, set the
  454. // metadata for that entry, and return a pointer to the entry and `true`. When
  455. // necessary, this will grow the hashtable to cause there to be sufficient
  456. // empty entries.
  457. template <typename LookupKeyT>
  458. auto InsertImpl(LookupKeyT lookup_key, KeyContextT key_context)
  459. -> std::pair<EntryT*, bool>;
  460. // Grow the table to specific allocation size.
  461. //
  462. // This will grow the the table if necessary for it to have an allocation size
  463. // of `target_alloc_size` which must be a power of two. Note that this will
  464. // not allow that many keys to be inserted into the hashtable, but a smaller
  465. // number based on the load factor. If a specific number of insertions need to
  466. // be achieved without triggering growth, use the `GrowForInsertCountImpl`
  467. // method.
  468. auto GrowToAllocSizeImpl(ssize_t target_alloc_size, KeyContextT key_context)
  469. -> void;
  470. // Grow the table to allow inserting the specified number of keys.
  471. auto GrowForInsertCountImpl(ssize_t count, KeyContextT key_context) -> void;
  472. // Looks up the entry in the hashtable, and if found destroys the entry and
  473. // returns `true`. If not found, returns `false`.
  474. //
  475. // Does not release any memory, just leaves a tombstone behind so this entry
  476. // cannot be found and the slot can in theory be reused.
  477. template <typename LookupKeyT>
  478. auto EraseImpl(LookupKeyT lookup_key, KeyContextT key_context) -> bool;
  479. // Erases all entries in the hashtable but leaves the allocated storage.
  480. auto ClearImpl() -> void;
  481. private:
  482. template <typename InputBaseT, ssize_t SmallSize>
  483. friend class TableImpl;
  484. static constexpr ssize_t Alignment = std::max<ssize_t>(
  485. alignof(MetadataGroup), alignof(StorageEntry<KeyT, ValueT>));
  486. // Implementation of inline small storage for the provided key type, value
  487. // type, and small size. Specialized for a zero small size to be an empty
  488. // struct.
  489. template <ssize_t SmallSize>
  490. struct SmallStorage : Storage {
  491. alignas(Alignment) uint8_t metadata[SmallSize];
  492. mutable StorageEntry<KeyT, ValueT> entries[SmallSize];
  493. };
  494. // Specialized storage with no inline buffer to avoid any extra alignment.
  495. template <>
  496. struct SmallStorage<0> {};
  497. static auto Allocate(ssize_t alloc_size) -> Storage*;
  498. static auto Deallocate(Storage* storage, ssize_t alloc_size) -> void;
  499. auto growth_budget() const -> ssize_t { return growth_budget_; }
  500. auto alloc_size() const -> ssize_t { return view_impl_.alloc_size_; }
  501. auto alloc_size() -> ssize_t& { return view_impl_.alloc_size_; }
  502. auto storage() const -> Storage* { return view_impl_.storage_; }
  503. auto storage() -> Storage*& { return view_impl_.storage_; }
  504. auto metadata() const -> uint8_t* { return view_impl_.metadata(); }
  505. auto entries() const -> EntryT* { return view_impl_.entries(); }
  506. auto small_alloc_size() const -> ssize_t {
  507. return static_cast<unsigned>(small_alloc_size_);
  508. }
  509. auto is_small() const -> bool {
  510. CARBON_DCHECK(alloc_size() >= small_alloc_size());
  511. return alloc_size() == small_alloc_size();
  512. }
  513. // Wrapper to call `ViewImplT::PrefetchStorage`, see that method for details.
  514. auto PrefetchStorage() const -> void { view_impl_.PrefetchMetadata(); }
  515. auto Construct(Storage* small_storage) -> void;
  516. auto Destroy() -> void;
  517. auto CopySlotsFrom(const BaseImpl& arg) -> void
  518. requires(EntryT::IsCopyable);
  519. auto MoveFrom(BaseImpl&& arg, Storage* small_storage) -> void;
  520. auto InsertIntoEmpty(HashCode hash) -> EntryT*;
  521. static auto ComputeNextAllocSize(ssize_t old_alloc_size) -> ssize_t;
  522. static auto GrowthThresholdForAllocSize(ssize_t alloc_size) -> ssize_t;
  523. auto GrowToNextAllocSize(KeyContextT key_context) -> void;
  524. auto GrowAndInsert(HashCode hash, KeyContextT key_context) -> EntryT*;
  525. ViewImplT view_impl_;
  526. int growth_budget_;
  527. int small_alloc_size_;
  528. };
  529. // Implementation helper for defining a hashtable type with an SSO buffer.
  530. //
  531. // A specific user-facing hashtable should derive privately from this
  532. // type, and forward the implementation of its interface to functions in this
  533. // type. It should provide the corresponding user-facing hashtable base type as
  534. // the `InputBaseT` type parameter (rather than a key/value pair), and this type
  535. // will in turn derive from that provided base type. This allows derived-to-base
  536. // conversion from the user-facing hashtable type to the user-facing hashtable
  537. // base type. And it does so keeping the inheritance linear. The resulting
  538. // linear inheritance hierarchy for a `Map<K, T>` type will look like:
  539. //
  540. // Map<K, T>
  541. // ↓
  542. // TableImpl<MapBase<K, T>>
  543. // ↓
  544. // MapBase<K, T>
  545. // ↓
  546. // BaseImpl<K, T>
  547. //
  548. // Other than this inheritance technique, the patterns for this type, and how to
  549. // build user-facing hashtable types from it, mirror those of `ViewImpl`. See
  550. // its documentation for more details.
  551. template <typename InputBaseT, ssize_t SmallSize>
  552. class TableImpl : public InputBaseT {
  553. protected:
  554. using BaseT = InputBaseT;
  555. TableImpl() : BaseT(SmallSize, small_storage()) {}
  556. TableImpl(const TableImpl& arg)
  557. requires(BaseT::EntryT::IsCopyable);
  558. TableImpl(TableImpl&& arg) noexcept;
  559. auto operator=(const TableImpl& arg) -> TableImpl&
  560. requires(BaseT::EntryT::IsCopyable);
  561. auto operator=(TableImpl&& arg) noexcept -> TableImpl&;
  562. ~TableImpl();
  563. // Resets the hashtable to its initial state, clearing all entries and
  564. // releasing all memory. If the hashtable had an SSO buffer, that is restored
  565. // as the storage. Otherwise, a minimum sized table storage is allocated.
  566. auto ResetImpl() -> void;
  567. private:
  568. using KeyT = BaseT::KeyT;
  569. using ValueT = BaseT::ValueT;
  570. using EntryT = BaseT::EntryT;
  571. using SmallStorage = BaseT::template SmallStorage<SmallSize>;
  572. auto small_storage() const -> Storage*;
  573. auto SetUpStorage() -> void;
  574. [[no_unique_address]] mutable SmallStorage small_storage_;
  575. };
  576. ////////////////////////////////////////////////////////////////////////////////
  577. //
  578. // Only implementation details below this point.
  579. //
  580. ////////////////////////////////////////////////////////////////////////////////
  581. // Computes a seed that provides a small amount of entropy from ASLR where
  582. // available with minimal cost. The priority is speed, and this computes the
  583. // entropy in a way that doesn't require loading from memory, merely accessing
  584. // entropy already available without accessing memory.
  585. inline auto ComputeSeed() -> uint64_t {
  586. // A global variable whose address is used as a seed. This allows ASLR to
  587. // introduce some variation in hashtable ordering when enabled via the code
  588. // model for globals.
  589. extern volatile std::byte global_addr_seed;
  590. return reinterpret_cast<uint64_t>(&global_addr_seed);
  591. }
  592. inline auto ComputeProbeMaskFromSize(ssize_t size) -> size_t {
  593. CARBON_DCHECK(llvm::isPowerOf2_64(size),
  594. "Size must be a power of two for a hashed buffer!");
  595. // Since `size` is a power of two, we can make sure the probes are less
  596. // than `size` by making the mask `size - 1`. We also mask off the low
  597. // bits so the probes are a multiple of the size of the groups of entries.
  598. return (size - 1) & ~GroupMask;
  599. }
  600. // This class handles building a sequence of probe indices from a given
  601. // starting point, including both the quadratic growth and masking the index
  602. // to stay within the bucket array size. The starting point doesn't need to be
  603. // clamped to the size ahead of time (or even be positive), we will do it
  604. // internally.
  605. //
  606. // For reference on quadratic probing:
  607. // https://en.wikipedia.org/wiki/Quadratic_probing
  608. //
  609. // We compute the quadratic probe index incrementally, but we can also compute
  610. // it mathematically and will check that the incremental result matches our
  611. // mathematical expectation. We use the quadratic probing formula of:
  612. //
  613. // p(start, step) = (start + (step + step^2) / 2) (mod size / GroupSize)
  614. //
  615. // However, we compute it incrementally and scale all the variables by the group
  616. // size so it can be used as an index without an additional multiplication.
  617. class ProbeSequence {
  618. public:
  619. ProbeSequence(ssize_t start, ssize_t size) {
  620. mask_ = ComputeProbeMaskFromSize(size);
  621. p_ = start & mask_;
  622. #ifndef NDEBUG
  623. start_ = start & mask_;
  624. size_ = size;
  625. #endif
  626. }
  627. auto Next() -> void {
  628. step_ += GroupSize;
  629. p_ = (p_ + step_) & mask_;
  630. #ifndef NDEBUG
  631. // Verify against the quadratic formula we expect to be following by scaling
  632. // everything down by `GroupSize`.
  633. CARBON_DCHECK(
  634. (p_ / GroupSize) ==
  635. ((start_ / GroupSize +
  636. (step_ / GroupSize + (step_ / GroupSize) * (step_ / GroupSize)) /
  637. 2) %
  638. (size_ / GroupSize)),
  639. "Index in probe sequence does not match the expected formula.");
  640. CARBON_DCHECK(step_ < size_,
  641. "We necessarily visit all groups, so we can't have more "
  642. "probe steps than groups.");
  643. #endif
  644. }
  645. auto index() const -> ssize_t { return p_; }
  646. private:
  647. ssize_t step_ = 0;
  648. size_t mask_;
  649. ssize_t p_;
  650. #ifndef NDEBUG
  651. ssize_t start_;
  652. ssize_t size_;
  653. #endif
  654. };
  655. // TODO: Evaluate keeping this outlined to see if macro benchmarks observe the
  656. // same perf hit as micro benchmarks.
  657. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  658. template <typename LookupKeyT>
  659. auto ViewImpl<InputKeyT, InputValueT, InputKeyContextT>::LookupEntry(
  660. LookupKeyT lookup_key, KeyContextT key_context) const -> EntryT* {
  661. PrefetchMetadata();
  662. ssize_t local_size = alloc_size_;
  663. CARBON_DCHECK(local_size > 0);
  664. uint8_t* local_metadata = metadata();
  665. HashCode hash = key_context.HashKey(lookup_key, ComputeSeed());
  666. auto [hash_index, tag] = hash.ExtractIndexAndTag<7>();
  667. EntryT* local_entries = entries();
  668. // Walk through groups of entries using a quadratic probe starting from
  669. // `hash_index`.
  670. ProbeSequence s(hash_index, local_size);
  671. do {
  672. ssize_t group_index = s.index();
  673. // Load the group's metadata and prefetch the entries for this group. The
  674. // prefetch here helps hide key access latency while we're matching the
  675. // metadata.
  676. MetadataGroup g = MetadataGroup::Load(local_metadata, group_index);
  677. EntryT* group_entries = &local_entries[group_index];
  678. PrefetchEntryGroup(group_entries);
  679. // For each group, match the tag against the metadata to extract the
  680. // potentially matching entries within the group.
  681. auto metadata_matched_range = g.Match(tag);
  682. if (LLVM_LIKELY(metadata_matched_range)) {
  683. // If any entries in this group potentially match based on their metadata,
  684. // walk each candidate and compare its key to see if we have definitively
  685. // found a match.
  686. auto byte_it = metadata_matched_range.begin();
  687. auto byte_end = metadata_matched_range.end();
  688. do {
  689. EntryT* entry = byte_it.index_ptr(group_entries);
  690. if (LLVM_LIKELY(key_context.KeyEq(lookup_key, entry->key()))) {
  691. __builtin_assume(entry != nullptr);
  692. return entry;
  693. }
  694. ++byte_it;
  695. } while (LLVM_UNLIKELY(byte_it != byte_end));
  696. }
  697. // We failed to find a matching entry in this bucket, so check if there are
  698. // empty slots as that indicates we're done probing -- no later probed index
  699. // could have a match.
  700. auto empty_byte_matched_range = g.MatchEmpty();
  701. if (LLVM_LIKELY(empty_byte_matched_range)) {
  702. return nullptr;
  703. }
  704. s.Next();
  705. // We use a weird construct of an "unlikely" condition of `true`. The goal
  706. // is to get the compiler to not prioritize the back edge of the loop for
  707. // code layout, and in at least some tests this seems to be an effective
  708. // construct for achieving this.
  709. } while (LLVM_UNLIKELY(true));
  710. }
  711. // Note that we force inlining here because we expect to be called with lambdas
  712. // that will in turn be inlined to form the loop body. We don't want function
  713. // boundaries within the loop for performance, and recognizing the degree of
  714. // simplification from inlining these callbacks may be difficult to
  715. // automatically recognize.
  716. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  717. template <typename EntryCallbackT, typename GroupCallbackT>
  718. [[clang::always_inline]] auto
  719. ViewImpl<InputKeyT, InputValueT, InputKeyContextT>::ForEachEntry(
  720. EntryCallbackT entry_callback, GroupCallbackT group_callback) const
  721. -> void {
  722. uint8_t* local_metadata = metadata();
  723. EntryT* local_entries = entries();
  724. ssize_t local_size = alloc_size_;
  725. for (ssize_t group_index = 0; group_index < local_size;
  726. group_index += GroupSize) {
  727. auto g = MetadataGroup::Load(local_metadata, group_index);
  728. auto present_matched_range = g.MatchPresent();
  729. if (!present_matched_range) {
  730. continue;
  731. }
  732. for (ssize_t byte_index : present_matched_range) {
  733. entry_callback(local_entries[group_index + byte_index]);
  734. }
  735. group_callback(&local_metadata[group_index]);
  736. }
  737. }
  738. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  739. auto ViewImpl<InputKeyT, InputValueT, InputKeyContextT>::ComputeMetricsImpl(
  740. KeyContextT key_context) const -> Metrics {
  741. uint8_t* local_metadata = metadata();
  742. EntryT* local_entries = entries();
  743. ssize_t local_size = alloc_size_;
  744. Metrics metrics;
  745. // Compute the ones we can directly.
  746. metrics.deleted_count = llvm::count(
  747. llvm::ArrayRef(local_metadata, local_size), MetadataGroup::Deleted);
  748. metrics.storage_bytes = AllocByteSize(local_size);
  749. // We want to process present slots specially to collect metrics on their
  750. // probing behavior.
  751. for (ssize_t group_index = 0; group_index < local_size;
  752. group_index += GroupSize) {
  753. auto g = MetadataGroup::Load(local_metadata, group_index);
  754. auto present_matched_range = g.MatchPresent();
  755. for (ssize_t byte_index : present_matched_range) {
  756. ++metrics.key_count;
  757. ssize_t index = group_index + byte_index;
  758. HashCode hash =
  759. key_context.HashKey(local_entries[index].key(), ComputeSeed());
  760. auto [hash_index, tag] = hash.ExtractIndexAndTag<7>();
  761. ProbeSequence s(hash_index, local_size);
  762. metrics.probed_key_count +=
  763. static_cast<ssize_t>(s.index() != group_index);
  764. // For each probed key, go through the probe sequence to find both the
  765. // probe distance and how many comparisons are required.
  766. ssize_t distance = 0;
  767. ssize_t compares = 0;
  768. for (; s.index() != group_index; s.Next()) {
  769. auto probe_g = MetadataGroup::Load(local_metadata, s.index());
  770. auto probe_matched_range = probe_g.Match(tag);
  771. compares += std::distance(probe_matched_range.begin(),
  772. probe_matched_range.end());
  773. distance += 1;
  774. }
  775. auto probe_g = MetadataGroup::Load(local_metadata, s.index());
  776. auto probe_matched_range = probe_g.Match(tag);
  777. CARBON_CHECK(!probe_matched_range.empty());
  778. for (ssize_t match_index : probe_matched_range) {
  779. if (match_index >= byte_index) {
  780. // Note we only count the compares that will *fail* as part of
  781. // probing. The last successful compare isn't interesting, it is
  782. // always needed.
  783. break;
  784. }
  785. compares += 1;
  786. }
  787. metrics.probe_avg_distance += distance;
  788. metrics.probe_max_distance =
  789. std::max(metrics.probe_max_distance, distance);
  790. metrics.probe_avg_compares += compares;
  791. metrics.probe_max_compares =
  792. std::max(metrics.probe_max_compares, compares);
  793. }
  794. }
  795. if (metrics.key_count > 0) {
  796. metrics.probe_avg_compares /= metrics.key_count;
  797. metrics.probe_avg_distance /= metrics.key_count;
  798. }
  799. return metrics;
  800. }
  801. // TODO: Evaluate whether it is worth forcing this out-of-line given the
  802. // reasonable ABI boundary it forms and large volume of code necessary to
  803. // implement it.
  804. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  805. template <typename LookupKeyT>
  806. auto BaseImpl<InputKeyT, InputValueT, InputKeyContextT>::InsertImpl(
  807. LookupKeyT lookup_key, KeyContextT key_context)
  808. -> std::pair<EntryT*, bool> {
  809. CARBON_DCHECK(alloc_size() > 0);
  810. PrefetchStorage();
  811. uint8_t* local_metadata = metadata();
  812. HashCode hash = key_context.HashKey(lookup_key, ComputeSeed());
  813. auto [hash_index, tag] = hash.ExtractIndexAndTag<7>();
  814. // We re-purpose the empty control byte to signal no insert is needed to the
  815. // caller. This is guaranteed to not be a control byte we're inserting.
  816. // constexpr uint8_t NoInsertNeeded = Group::Empty;
  817. ssize_t group_with_deleted_index;
  818. MetadataGroup::MatchIndex deleted_match = {};
  819. EntryT* local_entries = entries();
  820. auto return_insert_at_index = [&](ssize_t index) -> std::pair<EntryT*, bool> {
  821. // We'll need to insert at this index so set the control group byte to the
  822. // proper value.
  823. local_metadata[index] = tag | MetadataGroup::PresentMask;
  824. return {&local_entries[index], true};
  825. };
  826. for (ProbeSequence s(hash_index, alloc_size());; s.Next()) {
  827. ssize_t group_index = s.index();
  828. // Load the group's metadata and prefetch the entries for this group. The
  829. // prefetch here helps hide key access latency while we're matching the
  830. // metadata.
  831. auto g = MetadataGroup::Load(local_metadata, group_index);
  832. EntryT* group_entries = &local_entries[group_index];
  833. ViewImplT::PrefetchEntryGroup(group_entries);
  834. auto control_byte_matched_range = g.Match(tag);
  835. if (control_byte_matched_range) {
  836. auto byte_it = control_byte_matched_range.begin();
  837. auto byte_end = control_byte_matched_range.end();
  838. do {
  839. EntryT* entry = byte_it.index_ptr(group_entries);
  840. if (LLVM_LIKELY(key_context.KeyEq(lookup_key, entry->key()))) {
  841. return {entry, false};
  842. }
  843. ++byte_it;
  844. } while (LLVM_UNLIKELY(byte_it != byte_end));
  845. }
  846. // Track the first group with a deleted entry that we could insert over.
  847. if (!deleted_match) {
  848. deleted_match = g.MatchDeleted();
  849. group_with_deleted_index = group_index;
  850. }
  851. // We failed to find a matching entry in this bucket, so check if there are
  852. // no empty slots. In that case, we'll continue probing.
  853. auto empty_match = g.MatchEmpty();
  854. if (!empty_match) {
  855. continue;
  856. }
  857. // Ok, we've finished probing without finding anything and need to insert
  858. // instead.
  859. // If we found a deleted slot, we don't need the probe sequence to insert
  860. // so just bail. We want to ensure building up a table is fast so we
  861. // de-prioritize this a bit. In practice this doesn't have too much of an
  862. // effect.
  863. if (LLVM_UNLIKELY(deleted_match)) {
  864. return return_insert_at_index(group_with_deleted_index +
  865. deleted_match.index());
  866. }
  867. // We're going to need to grow by inserting into an empty slot. Check that
  868. // we have the budget for that before we compute the exact index of the
  869. // empty slot. Without the growth budget we'll have to completely rehash and
  870. // so we can just bail here.
  871. if (LLVM_UNLIKELY(growth_budget_ == 0)) {
  872. return {GrowAndInsert(hash, key_context), true};
  873. }
  874. --growth_budget_;
  875. CARBON_DCHECK(growth_budget() >= 0,
  876. "Growth budget shouldn't have gone negative!");
  877. return return_insert_at_index(group_index + empty_match.index());
  878. }
  879. CARBON_FATAL(
  880. "We should never finish probing without finding the entry or an empty "
  881. "slot.");
  882. }
  883. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  884. [[clang::noinline]] auto
  885. BaseImpl<InputKeyT, InputValueT, InputKeyContextT>::GrowToAllocSizeImpl(
  886. ssize_t target_alloc_size, KeyContextT key_context) -> void {
  887. CARBON_CHECK(llvm::isPowerOf2_64(target_alloc_size));
  888. if (target_alloc_size <= alloc_size()) {
  889. return;
  890. }
  891. // If this is the next alloc size, we can used our optimized growth strategy.
  892. if (target_alloc_size == ComputeNextAllocSize(alloc_size())) {
  893. GrowToNextAllocSize(key_context);
  894. return;
  895. }
  896. // Create locals for the old state of the table.
  897. ssize_t old_size = alloc_size();
  898. CARBON_DCHECK(old_size > 0);
  899. bool old_small = is_small();
  900. Storage* old_storage = storage();
  901. uint8_t* old_metadata = metadata();
  902. EntryT* old_entries = entries();
  903. // Configure for the new size and allocate the new storage.
  904. alloc_size() = target_alloc_size;
  905. storage() = Allocate(target_alloc_size);
  906. std::memset(metadata(), 0, target_alloc_size);
  907. growth_budget_ = GrowthThresholdForAllocSize(target_alloc_size);
  908. // Just re-insert all the entries. As we're more than doubling the table size,
  909. // we don't bother with fancy optimizations here. Even using `memcpy` for the
  910. // entries seems unlikely to be a significant win given how sparse the
  911. // insertions will end up being.
  912. ssize_t count = 0;
  913. for (ssize_t group_index = 0; group_index < old_size;
  914. group_index += GroupSize) {
  915. auto g = MetadataGroup::Load(old_metadata, group_index);
  916. auto present_matched_range = g.MatchPresent();
  917. for (ssize_t byte_index : present_matched_range) {
  918. ++count;
  919. ssize_t index = group_index + byte_index;
  920. HashCode hash =
  921. key_context.HashKey(old_entries[index].key(), ComputeSeed());
  922. EntryT* new_entry = InsertIntoEmpty(hash);
  923. new_entry->MoveFrom(std::move(old_entries[index]));
  924. }
  925. }
  926. growth_budget_ -= count;
  927. if (!old_small) {
  928. // Old isn't a small buffer, so we need to deallocate it.
  929. Deallocate(old_storage, old_size);
  930. }
  931. }
  932. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  933. auto BaseImpl<InputKeyT, InputValueT, InputKeyContextT>::GrowForInsertCountImpl(
  934. ssize_t count, KeyContextT key_context) -> void {
  935. if (count < growth_budget_) {
  936. // Already space for the needed growth.
  937. return;
  938. }
  939. // Currently, we don't account for any tombstones marking deleted elements,
  940. // and just conservatively ensure the growth will create adequate growth
  941. // budget for insertions. We could make this more precise by instead walking
  942. // the table and only counting present slots, as once we grow we'll be able to
  943. // reclaim all of the deleted slots. But this adds complexity and it isn't
  944. // clear this is necessary so we do the simpler conservative thing.
  945. ssize_t used_budget =
  946. GrowthThresholdForAllocSize(alloc_size()) - growth_budget_;
  947. ssize_t budget_needed = used_budget + count;
  948. ssize_t space_needed = budget_needed + (budget_needed / 7);
  949. ssize_t target_alloc_size = llvm::NextPowerOf2(space_needed);
  950. CARBON_CHECK(GrowthThresholdForAllocSize(target_alloc_size) >
  951. (budget_needed));
  952. GrowToAllocSizeImpl(target_alloc_size, key_context);
  953. }
  954. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  955. template <typename LookupKeyT>
  956. auto BaseImpl<InputKeyT, InputValueT, InputKeyContextT>::EraseImpl(
  957. LookupKeyT lookup_key, KeyContextT key_context) -> bool {
  958. EntryT* entry = view_impl_.LookupEntry(lookup_key, key_context);
  959. if (!entry) {
  960. return false;
  961. }
  962. // If there are empty slots in this group then nothing will probe past this
  963. // group looking for an entry so we can simply set this slot to empty as
  964. // well. However, if every slot in this group is full, it might be part of
  965. // a long probe chain that we can't disrupt. In that case we mark the slot's
  966. // metadata as deleted to keep probes continuing past it.
  967. //
  968. // If we mark the slot as empty, we'll also need to increase the growth
  969. // budget.
  970. uint8_t* local_metadata = metadata();
  971. EntryT* local_entries = entries();
  972. ssize_t index = entry - local_entries;
  973. ssize_t group_index = index & ~GroupMask;
  974. auto g = MetadataGroup::Load(local_metadata, group_index);
  975. auto empty_matched_range = g.MatchEmpty();
  976. if (empty_matched_range) {
  977. local_metadata[index] = MetadataGroup::Empty;
  978. ++growth_budget_;
  979. } else {
  980. local_metadata[index] = MetadataGroup::Deleted;
  981. }
  982. if constexpr (!EntryT::IsTriviallyDestructible) {
  983. entry->Destroy();
  984. }
  985. return true;
  986. }
  987. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  988. auto BaseImpl<InputKeyT, InputValueT, InputKeyContextT>::ClearImpl() -> void {
  989. view_impl_.ForEachEntry(
  990. [](EntryT& entry) {
  991. if constexpr (!EntryT::IsTriviallyDestructible) {
  992. entry.Destroy();
  993. }
  994. },
  995. [](uint8_t* metadata_group) {
  996. // Clear the group.
  997. std::memset(metadata_group, 0, GroupSize);
  998. });
  999. growth_budget_ = GrowthThresholdForAllocSize(alloc_size());
  1000. }
  1001. // Allocates the appropriate memory layout for a table of the given
  1002. // `alloc_size`, with space both for the metadata array and entries.
  1003. //
  1004. // The returned pointer *must* be deallocated by calling the below `Deallocate`
  1005. // function with the same `alloc_size` as used here.
  1006. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  1007. auto BaseImpl<InputKeyT, InputValueT, InputKeyContextT>::Allocate(
  1008. ssize_t alloc_size) -> Storage* {
  1009. return reinterpret_cast<Storage*>(__builtin_operator_new(
  1010. ViewImplT::AllocByteSize(alloc_size),
  1011. static_cast<std::align_val_t>(Alignment), std::nothrow_t()));
  1012. }
  1013. // Deallocates a table's storage that was allocated with the `Allocate`
  1014. // function.
  1015. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  1016. auto BaseImpl<InputKeyT, InputValueT, InputKeyContextT>::Deallocate(
  1017. Storage* storage, ssize_t alloc_size) -> void {
  1018. ssize_t allocated_size = ViewImplT::AllocByteSize(alloc_size);
  1019. // We don't need the size, but make sure it always compiles.
  1020. static_cast<void>(allocated_size);
  1021. __builtin_operator_delete(storage,
  1022. #if __cpp_sized_deallocation
  1023. allocated_size,
  1024. #endif
  1025. static_cast<std::align_val_t>(Alignment));
  1026. }
  1027. // Construct a table using the provided small storage if `small_alloc_size_` is
  1028. // non-zero. If `small_alloc_size_` is zero, then `small_storage` won't be used
  1029. // and can be null. Regardless, after this the storage pointer is non-null and
  1030. // the size is non-zero so that we can directly begin inserting or querying the
  1031. // table.
  1032. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  1033. auto BaseImpl<InputKeyT, InputValueT, InputKeyContextT>::Construct(
  1034. Storage* small_storage) -> void {
  1035. if (small_alloc_size_ > 0) {
  1036. alloc_size() = small_alloc_size_;
  1037. storage() = small_storage;
  1038. } else {
  1039. // Directly allocate the initial buffer so that the hashtable is never in
  1040. // an empty state.
  1041. alloc_size() = MinAllocatedSize;
  1042. storage() = Allocate(MinAllocatedSize);
  1043. }
  1044. std::memset(metadata(), 0, alloc_size());
  1045. growth_budget_ = GrowthThresholdForAllocSize(alloc_size());
  1046. }
  1047. // Destroy the current table, releasing any memory used.
  1048. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  1049. auto BaseImpl<InputKeyT, InputValueT, InputKeyContextT>::Destroy() -> void {
  1050. // Check for a moved-from state and don't do anything. Only a moved-from table
  1051. // has a zero size.
  1052. if (alloc_size() == 0) {
  1053. return;
  1054. }
  1055. // Destroy all the entries.
  1056. if constexpr (!EntryT::IsTriviallyDestructible) {
  1057. view_impl_.ForEachEntry([](EntryT& entry) { entry.Destroy(); },
  1058. [](auto...) {});
  1059. }
  1060. // If small, nothing to deallocate.
  1061. if (is_small()) {
  1062. return;
  1063. }
  1064. // Just deallocate the storage without updating anything when destroying the
  1065. // object.
  1066. Deallocate(storage(), alloc_size());
  1067. }
  1068. // Copy all of the slots over from another table that is exactly the same
  1069. // allocation size.
  1070. //
  1071. // This requires the current table to already have storage allocated and set up
  1072. // but not initialized (or already cleared). It directly overwrites the storage
  1073. // allocation of the table to match the incoming argument.
  1074. //
  1075. // Despite being used in construction, this shouldn't be called for a moved-from
  1076. // `arg` -- in practice it is better for callers to handle this when setting up
  1077. // storage.
  1078. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  1079. auto BaseImpl<InputKeyT, InputValueT, InputKeyContextT>::CopySlotsFrom(
  1080. const BaseImpl& arg) -> void
  1081. requires(EntryT::IsCopyable)
  1082. {
  1083. CARBON_DCHECK(alloc_size() == arg.alloc_size());
  1084. ssize_t local_size = alloc_size();
  1085. // Preserve which slot every entry is in, including tombstones in the
  1086. // metadata, in order to copy into the new table's storage without rehashing
  1087. // all of the keys. This is especially important as we don't have an easy way
  1088. // to access the key context needed for rehashing here.
  1089. uint8_t* local_metadata = metadata();
  1090. EntryT* local_entries = entries();
  1091. const uint8_t* local_arg_metadata = arg.metadata();
  1092. const EntryT* local_arg_entries = arg.entries();
  1093. memcpy(local_metadata, local_arg_metadata, local_size);
  1094. for (ssize_t group_index = 0; group_index < local_size;
  1095. group_index += GroupSize) {
  1096. auto g = MetadataGroup::Load(local_arg_metadata, group_index);
  1097. for (ssize_t byte_index : g.MatchPresent()) {
  1098. local_entries[group_index + byte_index].CopyFrom(
  1099. local_arg_entries[group_index + byte_index]);
  1100. }
  1101. }
  1102. }
  1103. // Move from another table to this one.
  1104. //
  1105. // Note that the `small_storage` is *this* table's small storage pointer,
  1106. // provided from the `TableImpl` to this `BaseImpl` method as an argument.
  1107. //
  1108. // Requires the table to have size and growth already set up but otherwise the
  1109. // the table has not yet been initialized. Notably, storage should either not
  1110. // yet be constructed or already destroyed. It both sets up the storage and
  1111. // handles any moving slots needed.
  1112. //
  1113. // Note that because this is used in construction it needs to handle a
  1114. // moved-from `arg`.
  1115. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  1116. auto BaseImpl<InputKeyT, InputValueT, InputKeyContextT>::MoveFrom(
  1117. BaseImpl&& arg, Storage* small_storage) -> void {
  1118. ssize_t local_size = alloc_size();
  1119. CARBON_DCHECK(local_size == arg.alloc_size());
  1120. // If `arg` is moved-from, skip the rest as the local size is all we need.
  1121. if (local_size == 0) {
  1122. return;
  1123. }
  1124. if (arg.is_small()) {
  1125. CARBON_DCHECK(local_size == small_alloc_size_);
  1126. this->storage() = small_storage;
  1127. // For small tables, we have to move the entries as we can't move the tables
  1128. // themselves. We do this preserving their slots and even tombstones to
  1129. // avoid rehashing.
  1130. uint8_t* local_metadata = this->metadata();
  1131. EntryT* local_entries = this->entries();
  1132. uint8_t* local_arg_metadata = arg.metadata();
  1133. EntryT* local_arg_entries = arg.entries();
  1134. memcpy(local_metadata, local_arg_metadata, local_size);
  1135. if (EntryT::IsTriviallyRelocatable) {
  1136. memcpy(local_entries, local_arg_entries, local_size * sizeof(EntryT));
  1137. } else {
  1138. for (ssize_t group_index = 0; group_index < local_size;
  1139. group_index += GroupSize) {
  1140. auto g = MetadataGroup::Load(local_arg_metadata, group_index);
  1141. for (ssize_t byte_index : g.MatchPresent()) {
  1142. local_entries[group_index + byte_index].MoveFrom(
  1143. std::move(local_arg_entries[group_index + byte_index]));
  1144. }
  1145. }
  1146. }
  1147. } else {
  1148. // Just point to the allocated storage.
  1149. storage() = arg.storage();
  1150. }
  1151. // Finally, put the incoming table into a moved-from state.
  1152. arg.alloc_size() = 0;
  1153. // Replace the pointer with null to ease debugging.
  1154. arg.storage() = nullptr;
  1155. }
  1156. // Optimized routine to insert a key into a table when that key *definitely*
  1157. // isn't present in the table and the table *definitely* has a viable empty slot
  1158. // (and growth space) to insert into before any deleted slots. When both of
  1159. // these are true, typically just after growth, we can dramatically simplify the
  1160. // insert position search.
  1161. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  1162. auto BaseImpl<InputKeyT, InputValueT, InputKeyContextT>::InsertIntoEmpty(
  1163. HashCode hash) -> EntryT* {
  1164. auto [hash_index, tag] = hash.ExtractIndexAndTag<7>();
  1165. uint8_t* local_metadata = metadata();
  1166. EntryT* local_entries = entries();
  1167. for (ProbeSequence s(hash_index, alloc_size());; s.Next()) {
  1168. ssize_t group_index = s.index();
  1169. auto g = MetadataGroup::Load(local_metadata, group_index);
  1170. if (auto empty_match = g.MatchEmpty()) {
  1171. ssize_t index = group_index + empty_match.index();
  1172. local_metadata[index] = tag | MetadataGroup::PresentMask;
  1173. return &local_entries[index];
  1174. }
  1175. // Otherwise we continue probing.
  1176. }
  1177. }
  1178. // Apply our doubling growth strategy and (re-)check invariants around table
  1179. // size.
  1180. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  1181. auto BaseImpl<InputKeyT, InputValueT, InputKeyContextT>::ComputeNextAllocSize(
  1182. ssize_t old_alloc_size) -> ssize_t {
  1183. CARBON_DCHECK(llvm::isPowerOf2_64(old_alloc_size),
  1184. "Expected a power of two!");
  1185. ssize_t new_alloc_size;
  1186. bool overflow = __builtin_mul_overflow(old_alloc_size, 2, &new_alloc_size);
  1187. CARBON_CHECK(!overflow, "Computing the new size overflowed `ssize_t`!");
  1188. return new_alloc_size;
  1189. }
  1190. // Compute the growth threshold for a given size.
  1191. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  1192. auto BaseImpl<InputKeyT, InputValueT,
  1193. InputKeyContextT>::GrowthThresholdForAllocSize(ssize_t alloc_size)
  1194. -> ssize_t {
  1195. // We use a 7/8ths load factor to trigger growth.
  1196. return alloc_size - alloc_size / 8;
  1197. }
  1198. // Optimized routine for growing to the next alloc size.
  1199. //
  1200. // A particularly common and important-to-optimize path is growing to the next
  1201. // alloc size, which will always be a doubling of the allocated size. This
  1202. // allows an important optimization -- we're adding exactly one more high bit to
  1203. // the hash-computed index for each entry. This in turn means we can classify
  1204. // every entry in the table into three cases:
  1205. //
  1206. // 1) The new high bit is zero, the entry is at the same index in the new
  1207. // table as the old.
  1208. //
  1209. // 2) The new high bit is one, the entry is at the old index plus the old
  1210. // size.
  1211. //
  1212. // 3) The entry's current index doesn't match the initial hash index because
  1213. // it required some amount of probing to find an empty slot.
  1214. //
  1215. // The design of the hash table tries to minimize how many entries fall into
  1216. // case (3), so we expect the vast majority of entries to be in (1) or (2). This
  1217. // lets us model growth notionally as copying the hashtable twice into the lower
  1218. // and higher halves of the new allocation, clearing out the now-empty slots
  1219. // (from both deleted entries and entries in the other half of the table after
  1220. // growth), and inserting any probed elements. That model in turn is much more
  1221. // efficient than re-inserting all of the elements as it avoids the unnecessary
  1222. // parts of insertion and avoids interleaving random accesses for the probed
  1223. // elements. But most importantly, for trivially relocatable types it allows us
  1224. // to use `memcpy` rather than moving the elements individually.
  1225. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  1226. auto BaseImpl<InputKeyT, InputValueT, InputKeyContextT>::GrowToNextAllocSize(
  1227. KeyContextT key_context) -> void {
  1228. // We collect the probed elements in a small vector for re-insertion. It is
  1229. // tempting to reuse the already allocated storage, but doing so appears to
  1230. // be a (very slight) performance regression. These are relatively rare and
  1231. // storing them into the existing storage creates stores to the same regions
  1232. // of memory we're reading. Moreover, it requires moving both the key and the
  1233. // value twice, and doing the `memcpy` widening for relocatable types before
  1234. // the group walk rather than after the group walk. In practice, between the
  1235. // statistical rareness and using a large small size buffer here on the stack,
  1236. // we can handle this most efficiently with temporary, additional storage.
  1237. llvm::SmallVector<std::pair<ssize_t, HashCode>, 128> probed_indices;
  1238. // Create locals for the old state of the table.
  1239. ssize_t old_size = alloc_size();
  1240. CARBON_DCHECK(old_size > 0);
  1241. bool old_small = is_small();
  1242. Storage* old_storage = storage();
  1243. uint8_t* old_metadata = metadata();
  1244. EntryT* old_entries = entries();
  1245. #ifndef NDEBUG
  1246. // Count how many of the old table slots will end up being empty after we grow
  1247. // the table. This is both the currently empty slots, but also the deleted
  1248. // slots because we clear them to empty and re-insert everything that had any
  1249. // probing.
  1250. ssize_t debug_empty_count =
  1251. llvm::count(llvm::ArrayRef(old_metadata, old_size), MetadataGroup::Empty);
  1252. ssize_t debug_deleted_count = llvm::count(
  1253. llvm::ArrayRef(old_metadata, old_size), MetadataGroup::Deleted);
  1254. CARBON_DCHECK(
  1255. debug_empty_count >= (old_size - GrowthThresholdForAllocSize(old_size)),
  1256. "debug_empty_count: {0}, debug_deleted_count: {1}, size: {2}",
  1257. debug_empty_count, debug_deleted_count, old_size);
  1258. #endif
  1259. // Configure for the new size and allocate the new storage.
  1260. ssize_t new_size = ComputeNextAllocSize(old_size);
  1261. alloc_size() = new_size;
  1262. storage() = Allocate(new_size);
  1263. growth_budget_ = GrowthThresholdForAllocSize(new_size);
  1264. // Now extract the new components of the table.
  1265. uint8_t* new_metadata = metadata();
  1266. EntryT* new_entries = entries();
  1267. // Walk the metadata groups, clearing deleted to empty, duplicating the
  1268. // metadata for the low and high halves, and updating it based on where each
  1269. // entry will go in the new table. The updated metadata group is written to
  1270. // the new table, and for non-trivially relocatable entry types, the entry is
  1271. // also moved to its new location.
  1272. ssize_t count = 0;
  1273. for (ssize_t group_index = 0; group_index < old_size;
  1274. group_index += GroupSize) {
  1275. auto low_g = MetadataGroup::Load(old_metadata, group_index);
  1276. // Make sure to match present elements first to enable pipelining with
  1277. // clearing.
  1278. auto present_matched_range = low_g.MatchPresent();
  1279. low_g.ClearDeleted();
  1280. MetadataGroup high_g;
  1281. if constexpr (MetadataGroup::FastByteClear) {
  1282. // When we have a fast byte clear, we can update the metadata for the
  1283. // growth in-register and store at the end.
  1284. high_g = low_g;
  1285. } else {
  1286. // If we don't have a fast byte clear, we can store the metadata group
  1287. // eagerly here and overwrite bytes with a byte store below instead of
  1288. // clearing the byte in-register.
  1289. low_g.Store(new_metadata, group_index);
  1290. low_g.Store(new_metadata, group_index | old_size);
  1291. }
  1292. for (ssize_t byte_index : present_matched_range) {
  1293. ++count;
  1294. ssize_t old_index = group_index + byte_index;
  1295. if constexpr (!MetadataGroup::FastByteClear) {
  1296. CARBON_DCHECK(new_metadata[old_index] == old_metadata[old_index]);
  1297. CARBON_DCHECK(new_metadata[old_index | old_size] ==
  1298. old_metadata[old_index]);
  1299. }
  1300. HashCode hash =
  1301. key_context.HashKey(old_entries[old_index].key(), ComputeSeed());
  1302. ssize_t old_hash_index = hash.ExtractIndexAndTag<7>().first &
  1303. ComputeProbeMaskFromSize(old_size);
  1304. if (LLVM_UNLIKELY(old_hash_index != group_index)) {
  1305. probed_indices.push_back({old_index, hash});
  1306. if constexpr (MetadataGroup::FastByteClear) {
  1307. low_g.ClearByte(byte_index);
  1308. high_g.ClearByte(byte_index);
  1309. } else {
  1310. new_metadata[old_index] = MetadataGroup::Empty;
  1311. new_metadata[old_index | old_size] = MetadataGroup::Empty;
  1312. }
  1313. continue;
  1314. }
  1315. ssize_t new_index = hash.ExtractIndexAndTag<7>().first &
  1316. ComputeProbeMaskFromSize(new_size);
  1317. CARBON_DCHECK(new_index == old_hash_index ||
  1318. new_index == (old_hash_index | old_size));
  1319. // Toggle the newly added bit of the index to get to the other possible
  1320. // target index.
  1321. if constexpr (MetadataGroup::FastByteClear) {
  1322. (new_index == old_hash_index ? high_g : low_g).ClearByte(byte_index);
  1323. new_index += byte_index;
  1324. } else {
  1325. new_index += byte_index;
  1326. new_metadata[new_index ^ old_size] = MetadataGroup::Empty;
  1327. }
  1328. // If we need to explicitly move (and destroy) the key or value, do so
  1329. // here where we already know its target.
  1330. if constexpr (!EntryT::IsTriviallyRelocatable) {
  1331. new_entries[new_index].MoveFrom(std::move(old_entries[old_index]));
  1332. }
  1333. }
  1334. if constexpr (MetadataGroup::FastByteClear) {
  1335. low_g.Store(new_metadata, group_index);
  1336. high_g.Store(new_metadata, (group_index | old_size));
  1337. }
  1338. }
  1339. CARBON_DCHECK((count - static_cast<ssize_t>(probed_indices.size())) ==
  1340. (new_size - llvm::count(llvm::ArrayRef(new_metadata, new_size),
  1341. MetadataGroup::Empty)));
  1342. #ifndef NDEBUG
  1343. CARBON_DCHECK((debug_empty_count + debug_deleted_count) ==
  1344. (old_size - count));
  1345. CARBON_DCHECK(llvm::count(llvm::ArrayRef(new_metadata, new_size),
  1346. MetadataGroup::Empty) ==
  1347. debug_empty_count + debug_deleted_count +
  1348. static_cast<ssize_t>(probed_indices.size()) + old_size);
  1349. #endif
  1350. // If the keys or values are trivially relocatable, we do a bulk memcpy of
  1351. // them into place. This will copy them into both possible locations, which is
  1352. // fine. One will be empty and clobbered if reused or ignored. The other will
  1353. // be the one used. This might seem like it needs it to be valid for us to
  1354. // create two copies, but it doesn't. This produces the exact same storage as
  1355. // copying the storage into the wrong location first, and then again into the
  1356. // correct location. Only one is live and only one is destroyed.
  1357. if constexpr (EntryT::IsTriviallyRelocatable) {
  1358. memcpy(new_entries, old_entries, old_size * sizeof(EntryT));
  1359. memcpy(new_entries + old_size, old_entries, old_size * sizeof(EntryT));
  1360. }
  1361. // We then need to do a normal insertion for anything that was probed before
  1362. // growth, but we know we'll find an empty slot, so leverage that.
  1363. for (auto [old_index, hash] : probed_indices) {
  1364. EntryT* new_entry = InsertIntoEmpty(hash);
  1365. new_entry->MoveFrom(std::move(old_entries[old_index]));
  1366. }
  1367. CARBON_DCHECK(count ==
  1368. (new_size - llvm::count(llvm::ArrayRef(new_metadata, new_size),
  1369. MetadataGroup::Empty)));
  1370. growth_budget_ -= count;
  1371. CARBON_DCHECK(growth_budget_ ==
  1372. (GrowthThresholdForAllocSize(new_size) -
  1373. (new_size - llvm::count(llvm::ArrayRef(new_metadata, new_size),
  1374. MetadataGroup::Empty))));
  1375. CARBON_DCHECK(growth_budget_ > 0 &&
  1376. "Must still have a growth budget after rehash!");
  1377. if (!old_small) {
  1378. // Old isn't a small buffer, so we need to deallocate it.
  1379. Deallocate(old_storage, old_size);
  1380. }
  1381. }
  1382. // Grow the hashtable to create space and then insert into it. Returns the
  1383. // selected insertion entry. Never returns null. In addition to growing and
  1384. // selecting the insertion entry, this routine updates the metadata array so
  1385. // that this function can be directly called and the result returned from
  1386. // `InsertImpl`.
  1387. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  1388. [[clang::noinline]] auto
  1389. BaseImpl<InputKeyT, InputValueT, InputKeyContextT>::GrowAndInsert(
  1390. HashCode hash, KeyContextT key_context) -> EntryT* {
  1391. GrowToNextAllocSize(key_context);
  1392. // And insert the lookup_key into an index in the newly grown map and return
  1393. // that index for use.
  1394. --growth_budget_;
  1395. return InsertIntoEmpty(hash);
  1396. }
  1397. template <typename InputBaseT, ssize_t SmallSize>
  1398. TableImpl<InputBaseT, SmallSize>::TableImpl(const TableImpl& arg)
  1399. requires(BaseT::EntryT::IsCopyable)
  1400. : BaseT(arg.alloc_size(), arg.growth_budget_, SmallSize) {
  1401. // Check for completely broken objects. These invariants should be true even
  1402. // in a moved-from state.
  1403. CARBON_DCHECK(arg.alloc_size() == 0 || !arg.is_small() ||
  1404. arg.alloc_size() == SmallSize);
  1405. CARBON_DCHECK(arg.small_alloc_size_ == SmallSize);
  1406. CARBON_DCHECK(this->small_alloc_size_ == SmallSize);
  1407. if (this->alloc_size() != 0) {
  1408. SetUpStorage();
  1409. this->CopySlotsFrom(arg);
  1410. }
  1411. }
  1412. template <typename InputBaseT, ssize_t SmallSize>
  1413. auto TableImpl<InputBaseT, SmallSize>::operator=(const TableImpl& arg)
  1414. -> TableImpl&
  1415. requires(BaseT::EntryT::IsCopyable)
  1416. {
  1417. // Check for completely broken objects. These invariants should be true even
  1418. // in a moved-from state.
  1419. CARBON_DCHECK(arg.alloc_size() == 0 || !arg.is_small() ||
  1420. arg.alloc_size() == SmallSize);
  1421. CARBON_DCHECK(arg.small_alloc_size_ == SmallSize);
  1422. CARBON_DCHECK(this->small_alloc_size_ == SmallSize);
  1423. // We have to end up with an allocation size exactly equivalent to the
  1424. // incoming argument to avoid re-hashing every entry in the table, which isn't
  1425. // possible without key context.
  1426. if (arg.alloc_size() == this->alloc_size()) {
  1427. // No effective way for self-assignment to fall out of an efficient
  1428. // implementation so detect and bypass here. Similarly, if both are in a
  1429. // moved-from state, there is nothing to do.
  1430. if (&arg == this || this->alloc_size() == 0) {
  1431. return *this;
  1432. }
  1433. CARBON_DCHECK(arg.storage() != this->storage());
  1434. if constexpr (!EntryT::IsTriviallyDestructible) {
  1435. this->view_impl_.ForEachEntry([](EntryT& entry) { entry.Destroy(); },
  1436. [](auto...) {});
  1437. }
  1438. } else {
  1439. // The sizes don't match so destroy everything and re-setup the table
  1440. // storage.
  1441. this->Destroy();
  1442. this->alloc_size() = arg.alloc_size();
  1443. // If `arg` is moved-from, we've clear out our elements and put ourselves
  1444. // into a moved-from state. We're done.
  1445. if (this->alloc_size() == 0) {
  1446. return *this;
  1447. }
  1448. SetUpStorage();
  1449. }
  1450. this->growth_budget_ = arg.growth_budget_;
  1451. this->CopySlotsFrom(arg);
  1452. return *this;
  1453. }
  1454. // Puts the incoming table into a moved-from state that can be destroyed or
  1455. // re-initialized but must not be used otherwise.
  1456. template <typename InputBaseT, ssize_t SmallSize>
  1457. TableImpl<InputBaseT, SmallSize>::TableImpl(TableImpl&& arg) noexcept
  1458. : BaseT(arg.alloc_size(), arg.growth_budget_, SmallSize) {
  1459. // Check for completely broken objects. These invariants should be true even
  1460. // in a moved-from state.
  1461. CARBON_DCHECK(arg.alloc_size() == 0 || !arg.is_small() ||
  1462. arg.alloc_size() == SmallSize);
  1463. CARBON_DCHECK(arg.small_alloc_size_ == SmallSize);
  1464. CARBON_DCHECK(this->small_alloc_size_ == SmallSize);
  1465. this->MoveFrom(std::move(arg), small_storage());
  1466. }
  1467. template <typename InputBaseT, ssize_t SmallSize>
  1468. auto TableImpl<InputBaseT, SmallSize>::operator=(TableImpl&& arg) noexcept
  1469. -> TableImpl& {
  1470. // Check for completely broken objects. These invariants should be true even
  1471. // in a moved-from state.
  1472. CARBON_DCHECK(arg.alloc_size() == 0 || !arg.is_small() ||
  1473. arg.alloc_size() == SmallSize);
  1474. CARBON_DCHECK(arg.small_alloc_size_ == SmallSize);
  1475. CARBON_DCHECK(this->small_alloc_size_ == SmallSize);
  1476. // Destroy and deallocate our table.
  1477. this->Destroy();
  1478. // Defend against self-move by zeroing the size here before we start moving
  1479. // out of `arg`.
  1480. this->alloc_size() = 0;
  1481. // Setup to match argument and then finish the move.
  1482. this->alloc_size() = arg.alloc_size();
  1483. this->growth_budget_ = arg.growth_budget_;
  1484. this->MoveFrom(std::move(arg), small_storage());
  1485. return *this;
  1486. }
  1487. template <typename InputBaseT, ssize_t SmallSize>
  1488. TableImpl<InputBaseT, SmallSize>::~TableImpl() {
  1489. this->Destroy();
  1490. }
  1491. // Reset a table to its original state, including releasing any allocated
  1492. // memory.
  1493. template <typename InputBaseT, ssize_t SmallSize>
  1494. auto TableImpl<InputBaseT, SmallSize>::ResetImpl() -> void {
  1495. this->Destroy();
  1496. // Re-initialize the whole thing.
  1497. CARBON_DCHECK(this->small_alloc_size() == SmallSize);
  1498. this->Construct(small_storage());
  1499. }
  1500. template <typename InputBaseT, ssize_t SmallSize>
  1501. auto TableImpl<InputBaseT, SmallSize>::small_storage() const -> Storage* {
  1502. if constexpr (SmallSize > 0) {
  1503. // Do a bunch of validation of the small size to establish our invariants
  1504. // when we know we have a non-zero small size.
  1505. static_assert(llvm::isPowerOf2_64(SmallSize),
  1506. "SmallSize must be a power of two for a hashed buffer!");
  1507. static_assert(
  1508. SmallSize >= MaxGroupSize,
  1509. "We require all small sizes to multiples of the largest group "
  1510. "size supported to ensure it can be used portably. ");
  1511. static_assert(
  1512. (SmallSize % MaxGroupSize) == 0,
  1513. "Small size must be a multiple of the max group size supported "
  1514. "so that we can allocate a whole number of groups.");
  1515. // Implied by the max asserts above.
  1516. static_assert(SmallSize >= GroupSize);
  1517. static_assert((SmallSize % GroupSize) == 0);
  1518. static_assert(SmallSize >= alignof(StorageEntry<KeyT, ValueT>),
  1519. "Requested a small size that would require padding between "
  1520. "metadata bytes and correctly aligned key and value types. "
  1521. "Either a larger small size or a zero small size and heap "
  1522. "allocation are required for this key and value type.");
  1523. static_assert(offsetof(SmallStorage, entries) == SmallSize,
  1524. "Offset to entries in small size storage doesn't match "
  1525. "computed offset!");
  1526. return &small_storage_;
  1527. } else {
  1528. static_assert(
  1529. sizeof(TableImpl) == sizeof(BaseT),
  1530. "Empty small storage caused a size difference and wasted space!");
  1531. return nullptr;
  1532. }
  1533. }
  1534. // Helper to set up the storage of a table when a specific size has already been
  1535. // set up. If possible, uses any small storage, otherwise allocates.
  1536. template <typename InputBaseT, ssize_t SmallSize>
  1537. auto TableImpl<InputBaseT, SmallSize>::SetUpStorage() -> void {
  1538. CARBON_DCHECK(this->small_alloc_size() == SmallSize);
  1539. ssize_t local_size = this->alloc_size();
  1540. CARBON_DCHECK(local_size != 0);
  1541. if (local_size == SmallSize) {
  1542. this->storage() = small_storage();
  1543. } else {
  1544. this->storage() = BaseT::Allocate(local_size);
  1545. }
  1546. }
  1547. } // namespace Carbon::RawHashtable
  1548. #endif // CARBON_COMMON_RAW_HASHTABLE_H_