raw_hashtable.h 69 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #ifndef CARBON_COMMON_RAW_HASHTABLE_H_
  5. #define CARBON_COMMON_RAW_HASHTABLE_H_
  6. #include <algorithm>
  7. #include <concepts>
  8. #include <cstddef>
  9. #include <cstring>
  10. #include <iterator>
  11. #include <new>
  12. #include <type_traits>
  13. #include <utility>
  14. #include "common/check.h"
  15. #include "common/hashing.h"
  16. #include "common/hashtable_key_context.h"
  17. #include "common/raw_hashtable_metadata_group.h"
  18. #include "llvm/Support/Compiler.h"
  19. #include "llvm/Support/MathExtras.h"
  20. // A namespace collecting a set of low-level utilities for building hashtable
  21. // data structures. These should only be used as implementation details of
  22. // higher-level data-structure APIs.
  23. //
  24. // The utilities here use the `hashtable_key_context.h` provided `KeyContext` to
  25. // support the necessary hashtable operations on keys: hashing and comparison.
  26. // This also serves as the customization point for hashtables built on this
  27. // infrastructure for those operations. See that header file for details.
  28. //
  29. // These utilities support hashtables following a *specific* API design pattern,
  30. // and using Small-Size Optimization, or "SSO", when desired. We expect there to
  31. // be three layers to any hashtable design:
  32. //
  33. // - A *view* type: a read-only view of the hashtable contents. This type should
  34. // be a value type and is expected to be passed by-value in APIs. However, it
  35. // will have `const`-reference semantics, much like a `std::string_view`. Note
  36. // that the *entries* will continue to be mutable, it is only the *table* that
  37. // is read-only.
  38. //
  39. // - A *base* type: a base class type of the actual hashtable, which allows
  40. // almost all mutable operations but erases any specific SSO buffer size.
  41. // Because this is a base of the actual hash table, it is designed to be
  42. // passed as a non-`const` reference or pointer.
  43. //
  44. // - A *table* type: the actual hashtable which derives from the base type and
  45. // adds any desired SSO storage buffer. Beyond the physical storage, it also
  46. // allows resetting the table to its initial state & allocated size, as well
  47. // as copying and moving the table.
  48. //
  49. // For complete examples of the API design, see `set.h` for a hashtable-based
  50. // set data structure, and `map.h` for a hashtable-based map data structure.
  51. //
  52. // The hashtable design implemented here has several key invariants and design
  53. // elements that are essential to all three of the types above and the
  54. // functionality they provide.
  55. //
  56. // - The underlying hashtable uses [open addressing], a power-of-two table size,
  57. // and quadratic probing rather than closed addressing and chaining.
  58. //
  59. // [open addressing]: https://en.wikipedia.org/wiki/Open_addressing
  60. //
  61. // - Each _slot_ in the table corresponds to a key, a value, and one byte of
  62. // metadata. Each _entry_ is a key and value. The key and value for an entry
  63. // are stored together.
  64. //
  65. // - The allocated storage is organized into an array of metadata bytes followed
  66. // by an array of entry storage.
  67. //
  68. // - The metadata byte corresponding to each entry marks that entry is either
  69. // empty, deleted, or present. When present, a 7-bit tag is also stored using
  70. // another 7 bits from the hash of the entry key.
  71. //
  72. // - The storage for an entry is an internal type that should not be exposed to
  73. // users, and instead only the underlying keys and values.
  74. //
  75. // - The hash addressing and probing occurs over *groups* of slots rather than
  76. // individual entries. When inserting a new entry, it can be added to the
  77. // group it hashes to as long it is not full, and can even replace a slot with
  78. // a tombstone indicating a previously deleted entry. Only when the group is
  79. // full will it look at the next group in the probe sequence. As a result,
  80. // there may be entries in a group where a different group is the start of
  81. // that entry's probe sequence. Also, when performing a lookup, every group in
  82. // the probe sequence must be inspected for the lookup key until it is found
  83. // or the group has an empty slot.
  84. //
  85. // - Groups are scanned rapidly using the one-byte metadata for each entry in
  86. // the group and CPU instructions that allow comparing all of the metadata for
  87. // a group in parallel. For more details on the metadata group encoding and
  88. // scanning, see `raw_hashtable_metadata_group.h`.
  89. //
  90. // - `GroupSize` is a platform-specific relatively small power of two that fits
  91. // in some hardware register. However, `MaxGroupSize` is provided as a
  92. // portable max that is also a power of two. The table storage, whether
  93. // provided by an SSO buffer or allocated, is required to be a multiple of
  94. // `MaxGroupSize` to keep the requirement portable but sufficient for all
  95. // platforms.
  96. //
  97. // - There is *always* an allocated table of some multiple of `MaxGroupSize`.
  98. // This allows accesses to be branchless. When heap allocated, we pro-actively
  99. // allocate at least a minimum heap size table. When there is a small-size
  100. // optimization (SSO) buffer, that provides the initial allocation.
  101. //
  102. // - The table performs a minimal amount of bookkeeping that limits the APIs it
  103. // can support:
  104. // - `alloc_size` is the size of the table *allocated* (not *used*), and is
  105. // always a power of 2 at least as big as `MinAllocatedSize`.
  106. // - `storage` is a pointer to the storage for the `alloc_size` slots of the
  107. // table, and never null.
  108. // - `small_alloc_size` is the maximum `alloc_size` where the table is stored
  109. // in the object itself instead of separately on the heap. In this case,
  110. // `storage` points to `small_storage_`.
  111. // - `growth_budget` is the number of entries that may be added before the
  112. // table allocation is doubled. It is always
  113. // `GrowthThresholdForAllocSize(alloc_size)` minus the number of
  114. // non-empty (filled or deleted) slots. If it ever falls to 0, the table
  115. // is grown to keep it greater than 0.
  116. // There is also the "moved-from" state where the table may only be
  117. // reinitialized or destroyed where the `alloc_size` is 0 and `storage` is
  118. // null. Since it doesn't track the exact number of filled entries in a table,
  119. // it doesn't support a container-style `size` API.
  120. //
  121. // - There is no direct iterator support because of the complexity of embedding
  122. // the group-based metadata scanning into an iterator model. Instead, there is
  123. // just a for-each method that is passed a lambda to observe all entries. The
  124. // order of this observation is also not guaranteed.
  125. namespace Carbon::RawHashtable {
  126. // If allocating storage, allocate a minimum of one cacheline of group metadata
  127. // or a minimum of one group, whichever is larger.
  128. constexpr ssize_t MinAllocatedSize = std::max<ssize_t>(64, MaxGroupSize);
  129. // An entry in the hashtable storage of a `KeyT` and `ValueT` object.
  130. //
  131. // Allows manual construction, destruction, and access to these values so we can
  132. // create arrays af the entries prior to populating them with actual keys and
  133. // values.
  134. template <typename KeyT, typename ValueT>
  135. struct StorageEntry {
  136. static constexpr bool IsTriviallyDestructible =
  137. std::is_trivially_destructible_v<KeyT> &&
  138. std::is_trivially_destructible_v<ValueT>;
  139. static constexpr bool IsTriviallyRelocatable =
  140. IsTriviallyDestructible && std::is_trivially_move_constructible_v<KeyT> &&
  141. std::is_trivially_move_constructible_v<ValueT>;
  142. auto key() const -> const KeyT& {
  143. // Ensure we don't need more alignment than available. Inside a method body
  144. // to apply to the complete type.
  145. static_assert(
  146. alignof(StorageEntry) <= MinAllocatedSize,
  147. "The minimum allocated size turns into the alignment of our array of "
  148. "storage entries as they follow the metadata byte array.");
  149. return *std::launder(reinterpret_cast<const KeyT*>(&key_storage));
  150. }
  151. auto key() -> KeyT& {
  152. return const_cast<KeyT&>(const_cast<const StorageEntry*>(this)->key());
  153. }
  154. auto value() const -> const ValueT& {
  155. return *std::launder(reinterpret_cast<const ValueT*>(&value_storage));
  156. }
  157. auto value() -> ValueT& {
  158. return const_cast<ValueT&>(const_cast<const StorageEntry*>(this)->value());
  159. }
  160. // We handle destruction and move manually as we only want to expose distinct
  161. // `KeyT` and `ValueT` subobjects to user code that may need to do in-place
  162. // construction. As a consequence, this struct only provides the storage and
  163. // we have to manually manage the construction, move, and destruction of the
  164. // objects.
  165. auto Destroy() -> void {
  166. static_assert(!IsTriviallyDestructible,
  167. "Should never instantiate when trivial!");
  168. key().~KeyT();
  169. value().~ValueT();
  170. }
  171. auto CopyFrom(const StorageEntry& entry) -> void {
  172. if constexpr (IsTriviallyRelocatable) {
  173. memcpy(this, &entry, sizeof(StorageEntry));
  174. } else {
  175. new (&key_storage) KeyT(entry.key());
  176. new (&value_storage) ValueT(entry.value());
  177. }
  178. }
  179. // Move from an expiring entry and destroy that entry's key and value.
  180. // Optimizes to directly use `memcpy` when correct.
  181. auto MoveFrom(StorageEntry&& entry) -> void {
  182. if constexpr (IsTriviallyRelocatable) {
  183. memcpy(this, &entry, sizeof(StorageEntry));
  184. } else {
  185. new (&key_storage) KeyT(std::move(entry.key()));
  186. entry.key().~KeyT();
  187. new (&value_storage) ValueT(std::move(entry.value()));
  188. entry.value().~ValueT();
  189. }
  190. }
  191. alignas(KeyT) std::byte key_storage[sizeof(KeyT)];
  192. alignas(ValueT) std::byte value_storage[sizeof(ValueT)];
  193. };
  194. // A specialization of the storage entry for sets without a distinct value type.
  195. // Somewhat duplicative with the key-value version, but C++ specialization makes
  196. // doing better difficult.
  197. template <typename KeyT>
  198. struct StorageEntry<KeyT, void> {
  199. static constexpr bool IsTriviallyDestructible =
  200. std::is_trivially_destructible_v<KeyT>;
  201. static constexpr bool IsTriviallyRelocatable =
  202. IsTriviallyDestructible && std::is_trivially_move_constructible_v<KeyT>;
  203. auto key() const -> const KeyT& {
  204. // Ensure we don't need more alignment than available.
  205. static_assert(
  206. alignof(StorageEntry) <= MinAllocatedSize,
  207. "The minimum allocated size turns into the alignment of our array of "
  208. "storage entries as they follow the metadata byte array.");
  209. return *std::launder(reinterpret_cast<const KeyT*>(&key_storage));
  210. }
  211. auto key() -> KeyT& {
  212. return const_cast<KeyT&>(const_cast<const StorageEntry*>(this)->key());
  213. }
  214. auto Destroy() -> void {
  215. static_assert(!IsTriviallyDestructible,
  216. "Should never instantiate when trivial!");
  217. key().~KeyT();
  218. }
  219. auto CopyFrom(const StorageEntry& entry) -> void {
  220. if constexpr (IsTriviallyRelocatable) {
  221. memcpy(this, &entry, sizeof(StorageEntry));
  222. } else {
  223. new (&key_storage) KeyT(entry.key());
  224. }
  225. }
  226. auto MoveFrom(StorageEntry&& entry) -> void {
  227. if constexpr (IsTriviallyRelocatable) {
  228. memcpy(this, &entry, sizeof(StorageEntry));
  229. } else {
  230. new (&key_storage) KeyT(std::move(entry.key()));
  231. entry.key().~KeyT();
  232. }
  233. }
  234. alignas(KeyT) std::byte key_storage[sizeof(KeyT)];
  235. };
  236. struct Metrics {
  237. // How many keys are present in the table.
  238. ssize_t key_count = 0;
  239. // How many slots of the table are reserved due to deleted markers required to
  240. // preserve probe sequences.
  241. ssize_t deleted_count = 0;
  242. // How many bytes of allocated storage are used by the table. Note, does not
  243. // include the table object or any small-size buffer.
  244. ssize_t storage_bytes = 0;
  245. // How many keys have required probing beyond the initial group. These are the
  246. // keys with a probe distance > 0.
  247. ssize_t probed_key_count = 0;
  248. // The probe distance averaged over every key. If every key is in its initial
  249. // group, this will be zero as no keys will have a larger probe distance. In
  250. // general, we want this to be as close to zero as possible.
  251. double probe_avg_distance = 0.0;
  252. // The maximum probe distance found for a single key in the table.
  253. ssize_t probe_max_distance = 0;
  254. // The average number of probing comparisons required to locate a specific key
  255. // in the table. This is how many comparisons are required *before* the key is
  256. // located, or the *failed* comparisons. We always have to do one successful
  257. // comparison at the end. This successful comparison isn't counted because
  258. // that focuses this metric on the overhead the table is introducing, and
  259. // keeps a "perfect" table with an average of `0.0` here similar to the
  260. // perfect average of `0.0` average probe distance.
  261. double probe_avg_compares = 0.0;
  262. // The maximum number of probing comparisons required to locate a specific
  263. // key in the table.
  264. ssize_t probe_max_compares = 0;
  265. };
  266. // A placeholder empty type used to model pointers to the allocated buffer of
  267. // storage.
  268. //
  269. // The allocated storage doesn't have a meaningful static layout -- it consists
  270. // of an array of metadata groups followed by an array of storage entries.
  271. // However, we want to be able to mark pointers to this and so use pointers to
  272. // this placeholder type as that signifier.
  273. //
  274. // This is a complete, empty type so that it can be used as a base class of a
  275. // specific concrete storage type for compile-time sized storage.
  276. struct Storage {};
  277. // Forward declaration to support friending, see the definition below.
  278. template <typename KeyT, typename ValueT = void,
  279. typename InputKeyContextT = DefaultKeyContext>
  280. class BaseImpl;
  281. // Implementation helper for defining a read-only view type for a hashtable.
  282. //
  283. // A specific user-facing hashtable view type should derive privately from this
  284. // type, and forward the implementation of its interface to functions in this
  285. // type.
  286. //
  287. // The methods available to user-facing hashtable types are `protected`, and
  288. // where they are expected to directly map to a public API, named with an
  289. // `Impl`. The suffix naming ensures types don't `using` in these low-level APIs
  290. // but declare their own and implement them by forwarding to these APIs. We
  291. // don't want users to have to read these implementation details to understand
  292. // their container's API, so none of these methods should be `using`-ed into the
  293. // user facing types.
  294. //
  295. // Some of the types are just convenience aliases and aren't important to
  296. // surface as part of the user-facing type API for readers and so those are
  297. // reasonable to add via a `using`.
  298. //
  299. // Some methods are used by other parts of the raw hashtable implementation.
  300. // Those are kept `private` and where necessary the other components of the raw
  301. // hashtable implementation are friended to give access to them.
  302. template <typename InputKeyT, typename InputValueT = void,
  303. typename InputKeyContextT = DefaultKeyContext>
  304. class ViewImpl {
  305. protected:
  306. using KeyT = InputKeyT;
  307. using ValueT = InputValueT;
  308. using KeyContextT = InputKeyContextT;
  309. using EntryT = StorageEntry<KeyT, ValueT>;
  310. using MetricsT = Metrics;
  311. friend class BaseImpl<KeyT, ValueT, KeyContextT>;
  312. template <typename InputBaseT, ssize_t SmallSize>
  313. friend class TableImpl;
  314. // Make more-`const` types friends to enable conversions that add `const`.
  315. friend class ViewImpl<const KeyT, ValueT, KeyContextT>;
  316. friend class ViewImpl<KeyT, const ValueT, KeyContextT>;
  317. friend class ViewImpl<const KeyT, const ValueT, KeyContextT>;
  318. ViewImpl() = default;
  319. // Support adding `const` to either key or value type of some other view.
  320. template <typename OtherKeyT, typename OtherValueT>
  321. // NOLINTNEXTLINE(google-explicit-constructor)
  322. ViewImpl(ViewImpl<OtherKeyT, OtherValueT, KeyContextT> other_view)
  323. requires(std::same_as<KeyT, OtherKeyT> ||
  324. std::same_as<KeyT, const OtherKeyT>) &&
  325. (std::same_as<ValueT, OtherValueT> ||
  326. std::same_as<ValueT, const OtherValueT>)
  327. : alloc_size_(other_view.alloc_size_), storage_(other_view.storage_) {}
  328. // Looks up an entry in the hashtable and returns its address or null if not
  329. // present.
  330. template <typename LookupKeyT>
  331. auto LookupEntry(LookupKeyT lookup_key, KeyContextT key_context) const
  332. -> EntryT*;
  333. // Calls `entry_callback` for each entry in the hashtable. All the entries
  334. // within a specific group are visited first, and then `group_callback` is
  335. // called on the group itself. The `group_callback` is typically only used by
  336. // the internals of the hashtable.
  337. template <typename EntryCallbackT, typename GroupCallbackT>
  338. auto ForEachEntry(EntryCallbackT entry_callback,
  339. GroupCallbackT group_callback) const -> void;
  340. // Returns a collection of informative metrics on the the current state of the
  341. // table, useful for performance analysis. These include relatively slow to
  342. // compute metrics requiring deep inspection of the table's state.
  343. auto ComputeMetricsImpl(KeyContextT key_context) const -> MetricsT;
  344. private:
  345. ViewImpl(ssize_t alloc_size, Storage* storage)
  346. : alloc_size_(alloc_size), storage_(storage) {}
  347. // Computes the offset from the metadata array to the entries array for a
  348. // given size. This is trivial, but we use this routine to enforce invariants
  349. // on the sizes.
  350. static constexpr auto EntriesOffset(ssize_t alloc_size) -> ssize_t {
  351. CARBON_DCHECK(llvm::isPowerOf2_64(alloc_size))
  352. << "Size must be a power of two for a hashed buffer!";
  353. // The size is always a power of two. We prevent any too-small sizes so it
  354. // being a power of two provides the needed alignment. As a result, the
  355. // offset is exactly the size. We validate this here to catch alignment bugs
  356. // early.
  357. CARBON_DCHECK(static_cast<uint64_t>(alloc_size) ==
  358. llvm::alignTo<alignof(EntryT)>(alloc_size));
  359. return alloc_size;
  360. }
  361. // Compute the allocated table's byte size.
  362. static constexpr auto AllocByteSize(ssize_t alloc_size) -> ssize_t {
  363. return EntriesOffset(alloc_size) + sizeof(EntryT) * alloc_size;
  364. }
  365. auto metadata() const -> uint8_t* {
  366. return reinterpret_cast<uint8_t*>(storage_);
  367. }
  368. auto entries() const -> EntryT* {
  369. return reinterpret_cast<EntryT*>(reinterpret_cast<std::byte*>(storage_) +
  370. EntriesOffset(alloc_size_));
  371. }
  372. ssize_t alloc_size_;
  373. Storage* storage_;
  374. };
  375. // Implementation helper for defining a read-write base type for a hashtable
  376. // that type-erases any SSO buffer.
  377. //
  378. // A specific user-facing hashtable base type should derive using *`protected`*
  379. // inheritance from this type, and forward the implementation of its interface
  380. // to functions in this type.
  381. //
  382. // Other than the use of `protected` inheritance, the patterns for this type,
  383. // and how to build user-facing hashtable base types from it, mirror those of
  384. // `ViewImpl`. See its documentation for more details.
  385. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  386. class BaseImpl {
  387. protected:
  388. using KeyT = InputKeyT;
  389. using ValueT = InputValueT;
  390. using KeyContextT = InputKeyContextT;
  391. using ViewImplT = ViewImpl<KeyT, ValueT, KeyContextT>;
  392. using EntryT = typename ViewImplT::EntryT;
  393. using MetricsT = typename ViewImplT::MetricsT;
  394. BaseImpl(int small_alloc_size, Storage* small_storage)
  395. : small_alloc_size_(small_alloc_size) {
  396. CARBON_CHECK(small_alloc_size >= 0);
  397. Construct(small_storage);
  398. }
  399. // Only used for copying and moving, and leaves storage uninitialized.
  400. BaseImpl(ssize_t alloc_size, int growth_budget, int small_alloc_size)
  401. : view_impl_(alloc_size, nullptr),
  402. growth_budget_(growth_budget),
  403. small_alloc_size_(small_alloc_size) {}
  404. ~BaseImpl();
  405. // NOLINTNEXTLINE(google-explicit-constructor): Designed to implicitly decay.
  406. operator ViewImplT() const { return view_impl(); }
  407. auto view_impl() const -> ViewImplT { return view_impl_; }
  408. // Looks up the provided key in the hashtable. If found, returns a pointer to
  409. // that entry and `false`.
  410. //
  411. // If not found, will locate an empty entry for inserting into, set the
  412. // metadata for that entry, and return a pointer to the entry and `true`. When
  413. // necessary, this will grow the hashtable to cause there to be sufficient
  414. // empty entries.
  415. template <typename LookupKeyT>
  416. auto InsertImpl(LookupKeyT lookup_key, KeyContextT key_context)
  417. -> std::pair<EntryT*, bool>;
  418. // Grow the table to specific allocation size.
  419. //
  420. // This will grow the the table if necessary for it to have an allocation size
  421. // of `target_alloc_size` which must be a power of two. Note that this will
  422. // not allow that many keys to be inserted into the hashtable, but a smaller
  423. // number based on the load factor. If a specific number of insertions need to
  424. // be achieved without triggering growth, use the `GrowForInsertCountImpl`
  425. // method.
  426. auto GrowToAllocSizeImpl(ssize_t target_alloc_size, KeyContextT key_context)
  427. -> void;
  428. // Grow the table to allow inserting the specified number of keys.
  429. auto GrowForInsertCountImpl(ssize_t count, KeyContextT key_context) -> void;
  430. // Looks up the entry in the hashtable, and if found destroys the entry and
  431. // returns `true`. If not found, returns `false`.
  432. //
  433. // Does not release any memory, just leaves a tombstone behind so this entry
  434. // cannot be found and the slot can in theory be re-used.
  435. template <typename LookupKeyT>
  436. auto EraseImpl(LookupKeyT lookup_key, KeyContextT key_context) -> bool;
  437. // Erases all entries in the hashtable but leaves the allocated storage.
  438. auto ClearImpl() -> void;
  439. private:
  440. template <typename InputBaseT, ssize_t SmallSize>
  441. friend class TableImpl;
  442. static constexpr ssize_t Alignment = std::max<ssize_t>(
  443. {alignof(MetadataGroup), alignof(StorageEntry<KeyT, ValueT>)});
  444. // Implementation of inline small storage for the provided key type, value
  445. // type, and small size. Specialized for a zero small size to be an empty
  446. // struct.
  447. template <ssize_t SmallSize>
  448. struct SmallStorage : Storage {
  449. alignas(Alignment) uint8_t metadata[SmallSize];
  450. mutable StorageEntry<KeyT, ValueT> entries[SmallSize];
  451. };
  452. // Specialized storage with no inline buffer to avoid any extra alignment.
  453. template <>
  454. struct SmallStorage<0> {};
  455. static auto Allocate(ssize_t alloc_size) -> Storage*;
  456. static auto Deallocate(Storage* storage, ssize_t alloc_size) -> void;
  457. auto growth_budget() const -> ssize_t { return growth_budget_; }
  458. auto alloc_size() const -> ssize_t { return view_impl_.alloc_size_; }
  459. auto alloc_size() -> ssize_t& { return view_impl_.alloc_size_; }
  460. auto storage() const -> Storage* { return view_impl_.storage_; }
  461. auto storage() -> Storage*& { return view_impl_.storage_; }
  462. auto metadata() const -> uint8_t* { return view_impl_.metadata(); }
  463. auto entries() const -> EntryT* { return view_impl_.entries(); }
  464. auto small_alloc_size() const -> ssize_t {
  465. return static_cast<unsigned>(small_alloc_size_);
  466. }
  467. auto is_small() const -> bool {
  468. CARBON_DCHECK(alloc_size() >= small_alloc_size());
  469. return alloc_size() == small_alloc_size();
  470. }
  471. auto Construct(Storage* small_storage) -> void;
  472. auto Destroy() -> void;
  473. auto CopySlotsFrom(const BaseImpl& arg) -> void;
  474. auto MoveFrom(BaseImpl&& arg, Storage* small_storage) -> void;
  475. template <typename LookupKeyT>
  476. auto InsertIntoEmpty(LookupKeyT lookup_key, KeyContextT key_context)
  477. -> EntryT*;
  478. static auto ComputeNextAllocSize(ssize_t old_alloc_size) -> ssize_t;
  479. static auto GrowthThresholdForAllocSize(ssize_t alloc_size) -> ssize_t;
  480. auto GrowToNextAllocSize(KeyContextT key_context) -> void;
  481. template <typename LookupKeyT>
  482. auto GrowAndInsert(LookupKeyT lookup_key, KeyContextT key_context) -> EntryT*;
  483. ViewImplT view_impl_;
  484. int growth_budget_;
  485. int small_alloc_size_;
  486. };
  487. // Implementation helper for defining a hashtable type with an SSO buffer.
  488. //
  489. // A specific user-facing hashtable should derive privately from this
  490. // type, and forward the implementation of its interface to functions in this
  491. // type. It should provide the corresponding user-facing hashtable base type as
  492. // the `InputBaseT` type parameter (rather than a key/value pair), and this type
  493. // will in turn derive from that provided base type. This allows derived-to-base
  494. // conversion from the user-facing hashtable type to the user-facing hashtable
  495. // base type. And it does so keeping the inheritance linear. The resulting
  496. // linear inheritance hierarchy for a `Map<K, T>` type will look like:
  497. //
  498. // Map<K, T>
  499. // ↓
  500. // TableImpl<MapBase<K, T>>
  501. // ↓
  502. // MapBase<K, T>
  503. // ↓
  504. // BaseImpl<K, T>
  505. //
  506. // Other than this inheritance technique, the patterns for this type, and how to
  507. // build user-facing hashtable types from it, mirror those of `ViewImpl`. See
  508. // its documentation for more details.
  509. template <typename InputBaseT, ssize_t SmallSize>
  510. class TableImpl : public InputBaseT {
  511. protected:
  512. using BaseT = InputBaseT;
  513. TableImpl() : BaseT(SmallSize, small_storage()) {}
  514. TableImpl(const TableImpl& arg);
  515. TableImpl(TableImpl&& arg) noexcept;
  516. auto operator=(const TableImpl& arg) -> TableImpl&;
  517. auto operator=(TableImpl&& arg) noexcept -> TableImpl&;
  518. // Resets the hashtable to its initial state, clearing all entries and
  519. // releasing all memory. If the hashtable had an SSO buffer, that is restored
  520. // as the storage. Otherwise, a minimum sized table storage is allocated.
  521. auto ResetImpl() -> void;
  522. private:
  523. using KeyT = BaseT::KeyT;
  524. using ValueT = BaseT::ValueT;
  525. using EntryT = BaseT::EntryT;
  526. using SmallStorage = BaseT::template SmallStorage<SmallSize>;
  527. auto small_storage() const -> Storage*;
  528. auto SetUpStorage() -> void;
  529. [[no_unique_address]] mutable SmallStorage small_storage_;
  530. };
  531. ////////////////////////////////////////////////////////////////////////////////
  532. //
  533. // Only implementation details below this point.
  534. //
  535. ////////////////////////////////////////////////////////////////////////////////
  536. // Computes a seed that provides a small amount of entropy from ASLR where
  537. // available with minimal cost. The priority is speed, and this computes the
  538. // entropy in a way that doesn't require loading from memory, merely accessing
  539. // entropy already available without accessing memory.
  540. inline auto ComputeSeed() -> uint64_t {
  541. // A global variable whose address is used as a seed. This allows ASLR to
  542. // introduce some variation in hashtable ordering when enabled via the code
  543. // model for globals.
  544. extern volatile std::byte global_addr_seed;
  545. return reinterpret_cast<uint64_t>(&global_addr_seed);
  546. }
  547. inline auto ComputeProbeMaskFromSize(ssize_t size) -> size_t {
  548. CARBON_DCHECK(llvm::isPowerOf2_64(size))
  549. << "Size must be a power of two for a hashed buffer!";
  550. // Since `size` is a power of two, we can make sure the probes are less
  551. // than `size` by making the mask `size - 1`. We also mask off the low
  552. // bits so the probes are a multiple of the size of the groups of entries.
  553. return (size - 1) & ~GroupMask;
  554. }
  555. // This class handles building a sequence of probe indices from a given
  556. // starting point, including both the quadratic growth and masking the index
  557. // to stay within the bucket array size. The starting point doesn't need to be
  558. // clamped to the size ahead of time (or even be positive), we will do it
  559. // internally.
  560. //
  561. // For reference on quadratic probing:
  562. // https://en.wikipedia.org/wiki/Quadratic_probing
  563. //
  564. // We compute the quadratic probe index incrementally, but we can also compute
  565. // it mathematically and will check that the incremental result matches our
  566. // mathematical expectation. We use the quadratic probing formula of:
  567. //
  568. // p(start, step) = (start + (step + step^2) / 2) (mod size / GroupSize)
  569. //
  570. // However, we compute it incrementally and scale all the variables by the group
  571. // size so it can be used as an index without an additional multiplication.
  572. class ProbeSequence {
  573. public:
  574. ProbeSequence(ssize_t start, ssize_t size) {
  575. mask_ = ComputeProbeMaskFromSize(size);
  576. p_ = start & mask_;
  577. #ifndef NDEBUG
  578. start_ = start & mask_;
  579. size_ = size;
  580. #endif
  581. }
  582. void Next() {
  583. step_ += GroupSize;
  584. p_ = (p_ + step_) & mask_;
  585. #ifndef NDEBUG
  586. // Verify against the quadratic formula we expect to be following by scaling
  587. // everything down by `GroupSize`.
  588. CARBON_DCHECK(
  589. (p_ / GroupSize) ==
  590. ((start_ / GroupSize +
  591. (step_ / GroupSize + (step_ / GroupSize) * (step_ / GroupSize)) / 2) %
  592. (size_ / GroupSize)))
  593. << "Index in probe sequence does not match the expected formula.";
  594. CARBON_DCHECK(step_ < size_) << "We necessarily visit all groups, so we "
  595. "can't have more probe steps than groups.";
  596. #endif
  597. }
  598. auto index() const -> ssize_t { return p_; }
  599. private:
  600. ssize_t step_ = 0;
  601. size_t mask_;
  602. ssize_t p_;
  603. #ifndef NDEBUG
  604. ssize_t start_;
  605. ssize_t size_;
  606. #endif
  607. };
  608. // TODO: Evaluate keeping this outlined to see if macro benchmarks observe the
  609. // same perf hit as micro benchmarks.
  610. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  611. template <typename LookupKeyT>
  612. auto ViewImpl<InputKeyT, InputValueT, InputKeyContextT>::LookupEntry(
  613. LookupKeyT lookup_key, KeyContextT key_context) const -> EntryT* {
  614. // Prefetch with a "low" temporal locality as we're primarily expecting a
  615. // brief use of the storage and then to return to application code.
  616. __builtin_prefetch(storage_, /*read*/ 0, /*low-locality*/ 1);
  617. ssize_t local_size = alloc_size_;
  618. CARBON_DCHECK(local_size > 0);
  619. uint8_t* local_metadata = metadata();
  620. HashCode hash = key_context.HashKey(lookup_key, ComputeSeed());
  621. auto [hash_index, tag] = hash.ExtractIndexAndTag<7>();
  622. EntryT* local_entries = entries();
  623. // Walk through groups of entries using a quadratic probe starting from
  624. // `hash_index`.
  625. ProbeSequence s(hash_index, local_size);
  626. do {
  627. ssize_t group_index = s.index();
  628. // For each group, match the tag against the metadata to extract the
  629. // potentially matching entries within the group.
  630. MetadataGroup g = MetadataGroup::Load(local_metadata, group_index);
  631. auto metadata_matched_range = g.Match(tag);
  632. if (LLVM_LIKELY(metadata_matched_range)) {
  633. // If any entries in this group potentially match based on their metadata,
  634. // walk each candidate and compare its key to see if we have definitively
  635. // found a match.
  636. EntryT* group_entries = &local_entries[group_index];
  637. auto byte_it = metadata_matched_range.begin();
  638. auto byte_end = metadata_matched_range.end();
  639. do {
  640. EntryT* entry = byte_it.index_ptr(group_entries);
  641. if (LLVM_LIKELY(key_context.KeyEq(lookup_key, entry->key()))) {
  642. __builtin_assume(entry != nullptr);
  643. return entry;
  644. }
  645. ++byte_it;
  646. } while (LLVM_UNLIKELY(byte_it != byte_end));
  647. }
  648. // We failed to find a matching entry in this bucket, so check if there are
  649. // empty slots as that indicates we're done probing -- no later probed index
  650. // could have a match.
  651. auto empty_byte_matched_range = g.MatchEmpty();
  652. if (LLVM_LIKELY(empty_byte_matched_range)) {
  653. return nullptr;
  654. }
  655. s.Next();
  656. // We use a weird construct of an "unlikely" condition of `true`. The goal
  657. // is to get the compiler to not prioritize the back edge of the loop for
  658. // code layout, and in at least some tests this seems to be an effective
  659. // construct for achieving this.
  660. } while (LLVM_UNLIKELY(true));
  661. }
  662. // Note that we force inlining here because we expect to be called with lambdas
  663. // that will in turn be inlined to form the loop body. We don't want function
  664. // boundaries within the loop for performance, and recognizing the degree of
  665. // simplification from inlining these callbacks may be difficult to
  666. // automatically recognize.
  667. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  668. template <typename EntryCallbackT, typename GroupCallbackT>
  669. [[clang::always_inline]] auto
  670. ViewImpl<InputKeyT, InputValueT, InputKeyContextT>::ForEachEntry(
  671. EntryCallbackT entry_callback, GroupCallbackT group_callback) const
  672. -> void {
  673. uint8_t* local_metadata = metadata();
  674. EntryT* local_entries = entries();
  675. ssize_t local_size = alloc_size_;
  676. for (ssize_t group_index = 0; group_index < local_size;
  677. group_index += GroupSize) {
  678. auto g = MetadataGroup::Load(local_metadata, group_index);
  679. auto present_matched_range = g.MatchPresent();
  680. if (!present_matched_range) {
  681. continue;
  682. }
  683. for (ssize_t byte_index : present_matched_range) {
  684. entry_callback(local_entries[group_index + byte_index]);
  685. }
  686. group_callback(&local_metadata[group_index]);
  687. }
  688. }
  689. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  690. auto ViewImpl<InputKeyT, InputValueT, InputKeyContextT>::ComputeMetricsImpl(
  691. KeyContextT key_context) const -> Metrics {
  692. uint8_t* local_metadata = metadata();
  693. EntryT* local_entries = entries();
  694. ssize_t local_size = alloc_size_;
  695. Metrics metrics;
  696. // Compute the ones we can directly.
  697. metrics.deleted_count = llvm::count(
  698. llvm::ArrayRef(local_metadata, local_size), MetadataGroup::Deleted);
  699. metrics.storage_bytes = AllocByteSize(local_size);
  700. // We want to process present slots specially to collect metrics on their
  701. // probing behavior.
  702. for (ssize_t group_index = 0; group_index < local_size;
  703. group_index += GroupSize) {
  704. auto g = MetadataGroup::Load(local_metadata, group_index);
  705. auto present_matched_range = g.MatchPresent();
  706. for (ssize_t byte_index : present_matched_range) {
  707. ++metrics.key_count;
  708. ssize_t index = group_index + byte_index;
  709. HashCode hash =
  710. key_context.HashKey(local_entries[index].key(), ComputeSeed());
  711. auto [hash_index, tag] = hash.ExtractIndexAndTag<7>();
  712. ProbeSequence s(hash_index, local_size);
  713. metrics.probed_key_count +=
  714. static_cast<ssize_t>(s.index() != group_index);
  715. // For each probed key, go through the probe sequence to find both the
  716. // probe distance and how many comparisons are required.
  717. ssize_t distance = 0;
  718. ssize_t compares = 0;
  719. for (; s.index() != group_index; s.Next()) {
  720. auto probe_g = MetadataGroup::Load(local_metadata, s.index());
  721. auto probe_matched_range = probe_g.Match(tag);
  722. compares += std::distance(probe_matched_range.begin(),
  723. probe_matched_range.end());
  724. distance += 1;
  725. }
  726. auto probe_g = MetadataGroup::Load(local_metadata, s.index());
  727. auto probe_matched_range = probe_g.Match(tag);
  728. CARBON_CHECK(!probe_matched_range.empty());
  729. for (ssize_t match_index : probe_matched_range) {
  730. if (match_index >= byte_index) {
  731. // Note we only count the compares that will *fail* as part of
  732. // probing. The last successful compare isn't interesting, it is
  733. // always needed.
  734. break;
  735. }
  736. compares += 1;
  737. }
  738. metrics.probe_avg_distance += distance;
  739. metrics.probe_max_distance =
  740. std::max(metrics.probe_max_distance, distance);
  741. metrics.probe_avg_compares += compares;
  742. metrics.probe_max_compares =
  743. std::max(metrics.probe_max_compares, compares);
  744. }
  745. }
  746. if (metrics.key_count > 0) {
  747. metrics.probe_avg_compares /= metrics.key_count;
  748. metrics.probe_avg_distance /= metrics.key_count;
  749. }
  750. return metrics;
  751. }
  752. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  753. BaseImpl<InputKeyT, InputValueT, InputKeyContextT>::~BaseImpl() {
  754. Destroy();
  755. }
  756. // TODO: Evaluate whether it is worth forcing this out-of-line given the
  757. // reasonable ABI boundary it forms and large volume of code necessary to
  758. // implement it.
  759. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  760. template <typename LookupKeyT>
  761. auto BaseImpl<InputKeyT, InputValueT, InputKeyContextT>::InsertImpl(
  762. LookupKeyT lookup_key, KeyContextT key_context)
  763. -> std::pair<EntryT*, bool> {
  764. CARBON_DCHECK(alloc_size() > 0);
  765. uint8_t* local_metadata = metadata();
  766. HashCode hash = key_context.HashKey(lookup_key, ComputeSeed());
  767. auto [hash_index, tag] = hash.ExtractIndexAndTag<7>();
  768. // We re-purpose the empty control byte to signal no insert is needed to the
  769. // caller. This is guaranteed to not be a control byte we're inserting.
  770. // constexpr uint8_t NoInsertNeeded = Group::Empty;
  771. ssize_t group_with_deleted_index;
  772. MetadataGroup::MatchIndex deleted_match = {};
  773. EntryT* local_entries = entries();
  774. auto return_insert_at_index = [&](ssize_t index) -> std::pair<EntryT*, bool> {
  775. // We'll need to insert at this index so set the control group byte to the
  776. // proper value.
  777. local_metadata[index] = tag | MetadataGroup::PresentMask;
  778. return {&local_entries[index], true};
  779. };
  780. for (ProbeSequence s(hash_index, alloc_size());; s.Next()) {
  781. ssize_t group_index = s.index();
  782. auto g = MetadataGroup::Load(local_metadata, group_index);
  783. auto control_byte_matched_range = g.Match(tag);
  784. if (control_byte_matched_range) {
  785. EntryT* group_entries = &local_entries[group_index];
  786. auto byte_it = control_byte_matched_range.begin();
  787. auto byte_end = control_byte_matched_range.end();
  788. do {
  789. EntryT* entry = byte_it.index_ptr(group_entries);
  790. if (LLVM_LIKELY(key_context.KeyEq(lookup_key, entry->key()))) {
  791. return {entry, false};
  792. }
  793. ++byte_it;
  794. } while (LLVM_UNLIKELY(byte_it != byte_end));
  795. }
  796. // Track the first group with a deleted entry that we could insert over.
  797. if (!deleted_match) {
  798. deleted_match = g.MatchDeleted();
  799. group_with_deleted_index = group_index;
  800. }
  801. // We failed to find a matching entry in this bucket, so check if there are
  802. // no empty slots. In that case, we'll continue probing.
  803. auto empty_match = g.MatchEmpty();
  804. if (!empty_match) {
  805. continue;
  806. }
  807. // Ok, we've finished probing without finding anything and need to insert
  808. // instead.
  809. // If we found a deleted slot, we don't need the probe sequence to insert
  810. // so just bail. We want to ensure building up a table is fast so we
  811. // de-prioritize this a bit. In practice this doesn't have too much of an
  812. // effect.
  813. if (LLVM_UNLIKELY(deleted_match)) {
  814. return return_insert_at_index(group_with_deleted_index +
  815. deleted_match.index());
  816. }
  817. // We're going to need to grow by inserting into an empty slot. Check that
  818. // we have the budget for that before we compute the exact index of the
  819. // empty slot. Without the growth budget we'll have to completely rehash and
  820. // so we can just bail here.
  821. if (LLVM_UNLIKELY(growth_budget_ == 0)) {
  822. return {GrowAndInsert(lookup_key, key_context), true};
  823. }
  824. --growth_budget_;
  825. CARBON_DCHECK(growth_budget() >= 0)
  826. << "Growth budget shouldn't have gone negative!";
  827. return return_insert_at_index(group_index + empty_match.index());
  828. }
  829. CARBON_FATAL() << "We should never finish probing without finding the entry "
  830. "or an empty slot.";
  831. }
  832. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  833. [[clang::noinline]] auto
  834. BaseImpl<InputKeyT, InputValueT, InputKeyContextT>::GrowToAllocSizeImpl(
  835. ssize_t target_alloc_size, KeyContextT key_context) -> void {
  836. CARBON_CHECK(llvm::isPowerOf2_64(target_alloc_size));
  837. if (target_alloc_size <= alloc_size()) {
  838. return;
  839. }
  840. // If this is the next alloc size, we can used our optimized growth strategy.
  841. if (target_alloc_size == ComputeNextAllocSize(alloc_size())) {
  842. GrowToNextAllocSize(key_context);
  843. return;
  844. }
  845. // Create locals for the old state of the table.
  846. ssize_t old_size = alloc_size();
  847. CARBON_DCHECK(old_size > 0);
  848. bool old_small = is_small();
  849. Storage* old_storage = storage();
  850. uint8_t* old_metadata = metadata();
  851. EntryT* old_entries = entries();
  852. // Configure for the new size and allocate the new storage.
  853. alloc_size() = target_alloc_size;
  854. storage() = Allocate(target_alloc_size);
  855. std::memset(metadata(), 0, target_alloc_size);
  856. growth_budget_ = GrowthThresholdForAllocSize(target_alloc_size);
  857. // Just re-insert all the entries. As we're more than doubling the table size,
  858. // we don't bother with fancy optimizations here. Even using `memcpy` for the
  859. // entries seems unlikely to be a significant win given how sparse the
  860. // insertions will end up being.
  861. ssize_t count = 0;
  862. for (ssize_t group_index = 0; group_index < old_size;
  863. group_index += GroupSize) {
  864. auto g = MetadataGroup::Load(old_metadata, group_index);
  865. auto present_matched_range = g.MatchPresent();
  866. for (ssize_t byte_index : present_matched_range) {
  867. ++count;
  868. ssize_t index = group_index + byte_index;
  869. EntryT* new_entry =
  870. InsertIntoEmpty(old_entries[index].key(), key_context);
  871. new_entry->MoveFrom(std::move(old_entries[index]));
  872. }
  873. }
  874. growth_budget_ -= count;
  875. if (!old_small) {
  876. // Old isn't a small buffer, so we need to deallocate it.
  877. Deallocate(old_storage, old_size);
  878. }
  879. }
  880. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  881. auto BaseImpl<InputKeyT, InputValueT, InputKeyContextT>::GrowForInsertCountImpl(
  882. ssize_t count, KeyContextT key_context) -> void {
  883. if (count < growth_budget_) {
  884. // Already space for the needed growth.
  885. return;
  886. }
  887. // Currently, we don't account for any tombstones marking deleted elements,
  888. // and just conservatively ensure the growth will create adequate growth
  889. // budget for insertions. We could make this more precise by instead walking
  890. // the table and only counting present slots, as once we grow we'll be able to
  891. // reclaim all of the deleted slots. But this adds complexity and it isn't
  892. // clear this is necessary so we do the simpler conservative thing.
  893. ssize_t used_budget =
  894. GrowthThresholdForAllocSize(alloc_size()) - growth_budget_;
  895. ssize_t budget_needed = used_budget + count;
  896. ssize_t space_needed = budget_needed + (budget_needed / 7);
  897. ssize_t target_alloc_size = llvm::NextPowerOf2(space_needed);
  898. CARBON_CHECK(GrowthThresholdForAllocSize(target_alloc_size) >
  899. (budget_needed));
  900. GrowToAllocSizeImpl(target_alloc_size, key_context);
  901. }
  902. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  903. template <typename LookupKeyT>
  904. auto BaseImpl<InputKeyT, InputValueT, InputKeyContextT>::EraseImpl(
  905. LookupKeyT lookup_key, KeyContextT key_context) -> bool {
  906. EntryT* entry = view_impl_.LookupEntry(lookup_key, key_context);
  907. if (!entry) {
  908. return false;
  909. }
  910. // If there are empty slots in this group then nothing will probe past this
  911. // group looking for an entry so we can simply set this slot to empty as
  912. // well. However, if every slot in this group is full, it might be part of
  913. // a long probe chain that we can't disrupt. In that case we mark the slot's
  914. // metadata as deleted to keep probes continuing past it.
  915. //
  916. // If we mark the slot as empty, we'll also need to increase the growth
  917. // budget.
  918. uint8_t* local_metadata = metadata();
  919. EntryT* local_entries = entries();
  920. ssize_t index = entry - local_entries;
  921. ssize_t group_index = index & ~GroupMask;
  922. auto g = MetadataGroup::Load(local_metadata, group_index);
  923. auto empty_matched_range = g.MatchEmpty();
  924. if (empty_matched_range) {
  925. local_metadata[index] = MetadataGroup::Empty;
  926. ++growth_budget_;
  927. } else {
  928. local_metadata[index] = MetadataGroup::Deleted;
  929. }
  930. if constexpr (!EntryT::IsTriviallyDestructible) {
  931. entry->Destroy();
  932. }
  933. return true;
  934. }
  935. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  936. auto BaseImpl<InputKeyT, InputValueT, InputKeyContextT>::ClearImpl() -> void {
  937. view_impl_.ForEachEntry(
  938. [](EntryT& entry) {
  939. if constexpr (!EntryT::IsTriviallyDestructible) {
  940. entry.Destroy();
  941. }
  942. },
  943. [](uint8_t* metadata_group) {
  944. // Clear the group.
  945. std::memset(metadata_group, 0, GroupSize);
  946. });
  947. growth_budget_ = GrowthThresholdForAllocSize(alloc_size());
  948. }
  949. // Allocates the appropriate memory layout for a table of the given
  950. // `alloc_size`, with space both for the metadata array and entries.
  951. //
  952. // The returned pointer *must* be deallocated by calling the below `Deallocate`
  953. // function with the same `alloc_size` as used here.
  954. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  955. auto BaseImpl<InputKeyT, InputValueT, InputKeyContextT>::Allocate(
  956. ssize_t alloc_size) -> Storage* {
  957. return reinterpret_cast<Storage*>(__builtin_operator_new(
  958. ViewImplT::AllocByteSize(alloc_size),
  959. static_cast<std::align_val_t>(Alignment), std::nothrow_t()));
  960. }
  961. // Deallocates a table's storage that was allocated with the `Allocate`
  962. // function.
  963. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  964. auto BaseImpl<InputKeyT, InputValueT, InputKeyContextT>::Deallocate(
  965. Storage* storage, ssize_t alloc_size) -> void {
  966. ssize_t allocated_size = ViewImplT::AllocByteSize(alloc_size);
  967. // We don't need the size, but make sure it always compiles.
  968. static_cast<void>(allocated_size);
  969. __builtin_operator_delete(storage,
  970. #if __cpp_sized_deallocation
  971. allocated_size,
  972. #endif
  973. static_cast<std::align_val_t>(Alignment));
  974. }
  975. // Construct a table using the provided small storage if `small_alloc_size_` is
  976. // non-zero. If `small_alloc_size_` is zero, then `small_storage` won't be used
  977. // and can be null. Regardless, after this the storage pointer is non-null and
  978. // the size is non-zero so that we can directly begin inserting or querying the
  979. // table.
  980. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  981. auto BaseImpl<InputKeyT, InputValueT, InputKeyContextT>::Construct(
  982. Storage* small_storage) -> void {
  983. if (small_alloc_size_ > 0) {
  984. alloc_size() = small_alloc_size_;
  985. storage() = small_storage;
  986. } else {
  987. // Directly allocate the initial buffer so that the hashtable is never in
  988. // an empty state.
  989. alloc_size() = MinAllocatedSize;
  990. storage() = Allocate(MinAllocatedSize);
  991. }
  992. std::memset(metadata(), 0, alloc_size());
  993. growth_budget_ = GrowthThresholdForAllocSize(alloc_size());
  994. }
  995. // Destroy the current table, releasing any memory used.
  996. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  997. auto BaseImpl<InputKeyT, InputValueT, InputKeyContextT>::Destroy() -> void {
  998. // Check for a moved-from state and don't do anything. Only a moved-from table
  999. // has a zero size.
  1000. if (alloc_size() == 0) {
  1001. return;
  1002. }
  1003. // Destroy all the entries.
  1004. if constexpr (!EntryT::IsTriviallyDestructible) {
  1005. view_impl_.ForEachEntry([](EntryT& entry) { entry.Destroy(); },
  1006. [](auto...) {});
  1007. }
  1008. // If small, nothing to deallocate.
  1009. if (is_small()) {
  1010. return;
  1011. }
  1012. // Just deallocate the storage without updating anything when destroying the
  1013. // object.
  1014. Deallocate(storage(), alloc_size());
  1015. }
  1016. // Copy all of the slots over from another table that is exactly the same
  1017. // allocation size.
  1018. //
  1019. // This requires the current table to already have storage allocated and set up
  1020. // but not initialized (or already cleared). It directly overwrites the storage
  1021. // allocation of the table to match the incoming argument.
  1022. //
  1023. // Despite being used in construction, this shouldn't be called for a moved-from
  1024. // `arg` -- in practice it is better for callers to handle this when setting up
  1025. // storage.
  1026. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  1027. auto BaseImpl<InputKeyT, InputValueT, InputKeyContextT>::CopySlotsFrom(
  1028. const BaseImpl& arg) -> void {
  1029. CARBON_DCHECK(alloc_size() == arg.alloc_size());
  1030. ssize_t local_size = alloc_size();
  1031. // Preserve which slot every entry is in, including tombstones in the
  1032. // metadata, in order to copy into the new table's storage without rehashing
  1033. // all of the keys. This is especially important as we don't have an easy way
  1034. // to access the key context needed for rehashing here.
  1035. uint8_t* local_metadata = metadata();
  1036. EntryT* local_entries = entries();
  1037. const uint8_t* local_arg_metadata = arg.metadata();
  1038. const EntryT* local_arg_entries = arg.entries();
  1039. memcpy(local_metadata, local_arg_metadata, local_size);
  1040. for (ssize_t group_index = 0; group_index < local_size;
  1041. group_index += GroupSize) {
  1042. auto g = MetadataGroup::Load(local_arg_metadata, group_index);
  1043. for (ssize_t byte_index : g.MatchPresent()) {
  1044. local_entries[group_index + byte_index].CopyFrom(
  1045. local_arg_entries[group_index + byte_index]);
  1046. }
  1047. }
  1048. }
  1049. // Move from another table to this one.
  1050. //
  1051. // Note that the `small_storage` is *this* table's small storage pointer,
  1052. // provided from the `TableImpl` to this `BaseImpl` method as an argument.
  1053. //
  1054. // Requires the table to have size and growth already set up but otherwise the
  1055. // the table has not yet been initialized. Notably, storage should either not
  1056. // yet be constructed or already destroyed. It both sets up the storage and
  1057. // handles any moving slots needed.
  1058. //
  1059. // Note that because this is used in construction it needs to handle a
  1060. // moved-from `arg`.
  1061. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  1062. auto BaseImpl<InputKeyT, InputValueT, InputKeyContextT>::MoveFrom(
  1063. BaseImpl&& arg, Storage* small_storage) -> void {
  1064. ssize_t local_size = alloc_size();
  1065. CARBON_DCHECK(local_size == arg.alloc_size());
  1066. // If `arg` is moved-from, skip the rest as the local size is all we need.
  1067. if (local_size == 0) {
  1068. return;
  1069. }
  1070. if (arg.is_small()) {
  1071. CARBON_DCHECK(local_size == small_alloc_size_);
  1072. this->storage() = small_storage;
  1073. // For small tables, we have to move the entries as we can't move the tables
  1074. // themselves. We do this preserving their slots and even tombstones to
  1075. // avoid rehashing.
  1076. uint8_t* local_metadata = this->metadata();
  1077. EntryT* local_entries = this->entries();
  1078. uint8_t* local_arg_metadata = arg.metadata();
  1079. EntryT* local_arg_entries = arg.entries();
  1080. memcpy(local_metadata, local_arg_metadata, local_size);
  1081. if (EntryT::IsTriviallyRelocatable) {
  1082. memcpy(local_entries, local_arg_entries, local_size * sizeof(EntryT));
  1083. } else {
  1084. for (ssize_t group_index = 0; group_index < local_size;
  1085. group_index += GroupSize) {
  1086. auto g = MetadataGroup::Load(local_arg_metadata, group_index);
  1087. for (ssize_t byte_index : g.MatchPresent()) {
  1088. local_entries[group_index + byte_index].MoveFrom(
  1089. std::move(local_arg_entries[group_index + byte_index]));
  1090. }
  1091. }
  1092. }
  1093. } else {
  1094. // Just point to the allocated storage.
  1095. storage() = arg.storage();
  1096. }
  1097. // Finally, put the incoming table into a moved-from state.
  1098. arg.alloc_size() = 0;
  1099. // Replace the pointer with null to ease debugging.
  1100. arg.storage() = nullptr;
  1101. }
  1102. // Optimized routine to insert a key into a table when that key *definitely*
  1103. // isn't present in the table and the table *definitely* has a viable empty slot
  1104. // (and growth space) to insert into before any deleted slots. When both of
  1105. // these are true, typically just after growth, we can dramatically simplify the
  1106. // insert position search.
  1107. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  1108. template <typename LookupKeyT>
  1109. [[clang::noinline]] auto
  1110. BaseImpl<InputKeyT, InputValueT, InputKeyContextT>::InsertIntoEmpty(
  1111. LookupKeyT lookup_key, KeyContextT key_context) -> EntryT* {
  1112. HashCode hash = key_context.HashKey(lookup_key, ComputeSeed());
  1113. auto [hash_index, tag] = hash.ExtractIndexAndTag<7>();
  1114. uint8_t* local_metadata = metadata();
  1115. EntryT* local_entries = entries();
  1116. for (ProbeSequence s(hash_index, alloc_size());; s.Next()) {
  1117. ssize_t group_index = s.index();
  1118. auto g = MetadataGroup::Load(local_metadata, group_index);
  1119. if (auto empty_match = g.MatchEmpty()) {
  1120. ssize_t index = group_index + empty_match.index();
  1121. local_metadata[index] = tag | MetadataGroup::PresentMask;
  1122. return &local_entries[index];
  1123. }
  1124. // Otherwise we continue probing.
  1125. }
  1126. }
  1127. // Apply our doubling growth strategy and (re-)check invariants around table
  1128. // size.
  1129. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  1130. auto BaseImpl<InputKeyT, InputValueT, InputKeyContextT>::ComputeNextAllocSize(
  1131. ssize_t old_alloc_size) -> ssize_t {
  1132. CARBON_DCHECK(llvm::isPowerOf2_64(old_alloc_size))
  1133. << "Expected a power of two!";
  1134. ssize_t new_alloc_size;
  1135. bool overflow = __builtin_mul_overflow(old_alloc_size, 2, &new_alloc_size);
  1136. CARBON_CHECK(!overflow) << "Computing the new size overflowed `ssize_t`!";
  1137. return new_alloc_size;
  1138. }
  1139. // Compute the growth threshold for a given size.
  1140. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  1141. auto BaseImpl<InputKeyT, InputValueT,
  1142. InputKeyContextT>::GrowthThresholdForAllocSize(ssize_t alloc_size)
  1143. -> ssize_t {
  1144. // We use a 7/8ths load factor to trigger growth.
  1145. return alloc_size - alloc_size / 8;
  1146. }
  1147. // Optimized routine for growing to the next alloc size.
  1148. //
  1149. // A particularly common and important-to-optimize path is growing to the next
  1150. // alloc size, which will always be a doubling of the allocated size. This
  1151. // allows an important optimization -- we're adding exactly one more high bit to
  1152. // the hash-computed index for each entry. This in turn means we can classify
  1153. // every entry in the table into three cases:
  1154. //
  1155. // 1) The new high bit is zero, the entry is at the same index in the new
  1156. // table as the old.
  1157. //
  1158. // 2) The new high bit is one, the entry is at the old index plus the old
  1159. // size.
  1160. //
  1161. // 3) The entry's current index doesn't match the initial hash index because
  1162. // it required some amount of probing to find an empty slot.
  1163. //
  1164. // The design of the hash table tries to minimize how many entries fall into
  1165. // case (3), so we expect the vast majority of entries to be in (1) or (2). This
  1166. // lets us model growth notionally as copying the hashtable twice into the lower
  1167. // and higher halves of the new allocation, clearing out the now-empty slots
  1168. // (from both deleted entries and entries in the other half of the table after
  1169. // growth), and inserting any probed elements. That model in turn is much more
  1170. // efficient than re-inserting all of the elements as it avoids the unnecessary
  1171. // parts of insertion and avoids interleaving random accesses for the probed
  1172. // elements. But most importantly, for trivially relocatable types it allows us
  1173. // to use `memcpy` rather than moving the elements individually.
  1174. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  1175. auto BaseImpl<InputKeyT, InputValueT, InputKeyContextT>::GrowToNextAllocSize(
  1176. KeyContextT key_context) -> void {
  1177. // We collect the probed elements in a small vector for re-insertion. It is
  1178. // tempting to reuse the already allocated storage, but doing so appears to
  1179. // be a (very slight) performance regression. These are relatively rare and
  1180. // storing them into the existing storage creates stores to the same regions
  1181. // of memory we're reading. Moreover, it requires moving both the key and the
  1182. // value twice, and doing the `memcpy` widening for relocatable types before
  1183. // the group walk rather than after the group walk. In practice, between the
  1184. // statistical rareness and using a large small size buffer here on the stack,
  1185. // we can handle this most efficiently with temporary, additional storage.
  1186. llvm::SmallVector<ssize_t, 128> probed_indices;
  1187. // Create locals for the old state of the table.
  1188. ssize_t old_size = alloc_size();
  1189. CARBON_DCHECK(old_size > 0);
  1190. bool old_small = is_small();
  1191. Storage* old_storage = storage();
  1192. uint8_t* old_metadata = metadata();
  1193. EntryT* old_entries = entries();
  1194. #ifndef NDEBUG
  1195. // Count how many of the old table slots will end up being empty after we grow
  1196. // the table. This is both the currently empty slots, but also the deleted
  1197. // slots because we clear them to empty and re-insert everything that had any
  1198. // probing.
  1199. ssize_t debug_empty_count =
  1200. llvm::count(llvm::ArrayRef(old_metadata, old_size), MetadataGroup::Empty);
  1201. ssize_t debug_deleted_count = llvm::count(
  1202. llvm::ArrayRef(old_metadata, old_size), MetadataGroup::Deleted);
  1203. CARBON_DCHECK(debug_empty_count >=
  1204. (old_size - GrowthThresholdForAllocSize(old_size)))
  1205. << "debug_empty_count: " << debug_empty_count
  1206. << ", debug_deleted_count: " << debug_deleted_count
  1207. << ", size: " << old_size;
  1208. #endif
  1209. // Configure for the new size and allocate the new storage.
  1210. ssize_t new_size = ComputeNextAllocSize(old_size);
  1211. alloc_size() = new_size;
  1212. storage() = Allocate(new_size);
  1213. growth_budget_ = GrowthThresholdForAllocSize(new_size);
  1214. // Now extract the new components of the table.
  1215. uint8_t* new_metadata = metadata();
  1216. EntryT* new_entries = entries();
  1217. // Walk the metadata groups, clearing deleted to empty, duplicating the
  1218. // metadata for the low and high halves, and updating it based on where each
  1219. // entry will go in the new table. The updated metadata group is written to
  1220. // the new table, and for non-trivially relocatable entry types, the entry is
  1221. // also moved to its new location.
  1222. ssize_t count = 0;
  1223. for (ssize_t group_index = 0; group_index < old_size;
  1224. group_index += GroupSize) {
  1225. auto low_g = MetadataGroup::Load(old_metadata, group_index);
  1226. // Make sure to match present elements first to enable pipelining with
  1227. // clearing.
  1228. auto present_matched_range = low_g.MatchPresent();
  1229. low_g.ClearDeleted();
  1230. MetadataGroup high_g;
  1231. if constexpr (MetadataGroup::FastByteClear) {
  1232. // When we have a fast byte clear, we can update the metadata for the
  1233. // growth in-register and store at the end.
  1234. high_g = low_g;
  1235. } else {
  1236. // If we don't have a fast byte clear, we can store the metadata group
  1237. // eagerly here and overwrite bytes with a byte store below instead of
  1238. // clearing the byte in-register.
  1239. low_g.Store(new_metadata, group_index);
  1240. low_g.Store(new_metadata, group_index | old_size);
  1241. }
  1242. for (ssize_t byte_index : present_matched_range) {
  1243. ++count;
  1244. ssize_t old_index = group_index + byte_index;
  1245. if constexpr (!MetadataGroup::FastByteClear) {
  1246. CARBON_DCHECK(new_metadata[old_index] == old_metadata[old_index]);
  1247. CARBON_DCHECK(new_metadata[old_index | old_size] ==
  1248. old_metadata[old_index]);
  1249. }
  1250. HashCode hash =
  1251. key_context.HashKey(old_entries[old_index].key(), ComputeSeed());
  1252. ssize_t old_hash_index = hash.ExtractIndexAndTag<7>().first &
  1253. ComputeProbeMaskFromSize(old_size);
  1254. if (LLVM_UNLIKELY(old_hash_index != group_index)) {
  1255. probed_indices.push_back(old_index);
  1256. if constexpr (MetadataGroup::FastByteClear) {
  1257. low_g.ClearByte(byte_index);
  1258. high_g.ClearByte(byte_index);
  1259. } else {
  1260. new_metadata[old_index] = MetadataGroup::Empty;
  1261. new_metadata[old_index | old_size] = MetadataGroup::Empty;
  1262. }
  1263. continue;
  1264. }
  1265. ssize_t new_index = hash.ExtractIndexAndTag<7>().first &
  1266. ComputeProbeMaskFromSize(new_size);
  1267. CARBON_DCHECK(new_index == old_hash_index ||
  1268. new_index == (old_hash_index | old_size));
  1269. // Toggle the newly added bit of the index to get to the other possible
  1270. // target index.
  1271. if constexpr (MetadataGroup::FastByteClear) {
  1272. (new_index == old_hash_index ? high_g : low_g).ClearByte(byte_index);
  1273. new_index += byte_index;
  1274. } else {
  1275. new_index += byte_index;
  1276. new_metadata[new_index ^ old_size] = MetadataGroup::Empty;
  1277. }
  1278. // If we need to explicitly move (and destroy) the key or value, do so
  1279. // here where we already know its target.
  1280. if constexpr (!EntryT::IsTriviallyRelocatable) {
  1281. new_entries[new_index].MoveFrom(std::move(old_entries[old_index]));
  1282. }
  1283. }
  1284. if constexpr (MetadataGroup::FastByteClear) {
  1285. low_g.Store(new_metadata, group_index);
  1286. high_g.Store(new_metadata, (group_index | old_size));
  1287. }
  1288. }
  1289. CARBON_DCHECK((count - static_cast<ssize_t>(probed_indices.size())) ==
  1290. (new_size - llvm::count(llvm::ArrayRef(new_metadata, new_size),
  1291. MetadataGroup::Empty)));
  1292. #ifndef NDEBUG
  1293. CARBON_DCHECK((debug_empty_count + debug_deleted_count) ==
  1294. (old_size - count));
  1295. CARBON_DCHECK(llvm::count(llvm::ArrayRef(new_metadata, new_size),
  1296. MetadataGroup::Empty) ==
  1297. debug_empty_count + debug_deleted_count +
  1298. static_cast<ssize_t>(probed_indices.size()) + old_size);
  1299. #endif
  1300. // If the keys or values are trivially relocatable, we do a bulk memcpy of
  1301. // them into place. This will copy them into both possible locations, which is
  1302. // fine. One will be empty and clobbered if reused or ignored. The other will
  1303. // be the one used. This might seem like it needs it to be valid for us to
  1304. // create two copies, but it doesn't. This produces the exact same storage as
  1305. // copying the storage into the wrong location first, and then again into the
  1306. // correct location. Only one is live and only one is destroyed.
  1307. if constexpr (EntryT::IsTriviallyRelocatable) {
  1308. memcpy(new_entries, old_entries, old_size * sizeof(EntryT));
  1309. memcpy(new_entries + old_size, old_entries, old_size * sizeof(EntryT));
  1310. }
  1311. // We then need to do a normal insertion for anything that was probed before
  1312. // growth, but we know we'll find an empty slot, so leverage that.
  1313. for (ssize_t old_index : probed_indices) {
  1314. EntryT* new_entry =
  1315. InsertIntoEmpty(old_entries[old_index].key(), key_context);
  1316. new_entry->MoveFrom(std::move(old_entries[old_index]));
  1317. }
  1318. CARBON_DCHECK(count ==
  1319. (new_size - llvm::count(llvm::ArrayRef(new_metadata, new_size),
  1320. MetadataGroup::Empty)));
  1321. growth_budget_ -= count;
  1322. CARBON_DCHECK(growth_budget_ ==
  1323. (GrowthThresholdForAllocSize(new_size) -
  1324. (new_size - llvm::count(llvm::ArrayRef(new_metadata, new_size),
  1325. MetadataGroup::Empty))));
  1326. CARBON_DCHECK(growth_budget_ > 0 &&
  1327. "Must still have a growth budget after rehash!");
  1328. if (!old_small) {
  1329. // Old isn't a small buffer, so we need to deallocate it.
  1330. Deallocate(old_storage, old_size);
  1331. }
  1332. }
  1333. // Grow the hashtable to create space and then insert into it. Returns the
  1334. // selected insertion entry. Never returns null. In addition to growing and
  1335. // selecting the insertion entry, this routine updates the metadata array so
  1336. // that this function can be directly called and the result returned from
  1337. // `InsertImpl`.
  1338. template <typename InputKeyT, typename InputValueT, typename InputKeyContextT>
  1339. template <typename LookupKeyT>
  1340. [[clang::noinline]] auto
  1341. BaseImpl<InputKeyT, InputValueT, InputKeyContextT>::GrowAndInsert(
  1342. LookupKeyT lookup_key, KeyContextT key_context) -> EntryT* {
  1343. GrowToNextAllocSize(key_context);
  1344. // And insert the lookup_key into an index in the newly grown map and return
  1345. // that index for use.
  1346. --growth_budget_;
  1347. return InsertIntoEmpty(lookup_key, key_context);
  1348. }
  1349. template <typename InputBaseT, ssize_t SmallSize>
  1350. TableImpl<InputBaseT, SmallSize>::TableImpl(const TableImpl& arg)
  1351. : BaseT(arg.alloc_size(), arg.growth_budget_, SmallSize) {
  1352. // Check for completely broken objects. These invariants should be true even
  1353. // in a moved-from state.
  1354. CARBON_DCHECK(arg.alloc_size() == 0 || !arg.is_small() ||
  1355. arg.alloc_size() == SmallSize);
  1356. CARBON_DCHECK(arg.small_alloc_size_ == SmallSize);
  1357. CARBON_DCHECK(this->small_alloc_size_ == SmallSize);
  1358. if (this->alloc_size() != 0) {
  1359. SetUpStorage();
  1360. this->CopySlotsFrom(arg);
  1361. }
  1362. }
  1363. template <typename InputBaseT, ssize_t SmallSize>
  1364. auto TableImpl<InputBaseT, SmallSize>::operator=(const TableImpl& arg)
  1365. -> TableImpl& {
  1366. // Check for completely broken objects. These invariants should be true even
  1367. // in a moved-from state.
  1368. CARBON_DCHECK(arg.alloc_size() == 0 || !arg.is_small() ||
  1369. arg.alloc_size() == SmallSize);
  1370. CARBON_DCHECK(arg.small_alloc_size_ == SmallSize);
  1371. CARBON_DCHECK(this->small_alloc_size_ == SmallSize);
  1372. // We have to end up with an allocation size exactly equivalent to the
  1373. // incoming argument to avoid re-hashing every entry in the table, which isn't
  1374. // possible without key context.
  1375. if (arg.alloc_size() == this->alloc_size()) {
  1376. // No effective way for self-assignment to fall out of an efficient
  1377. // implementation so detect and bypass here. Similarly, if both are in a
  1378. // moved-from state, there is nothing to do.
  1379. if (&arg == this || this->alloc_size() == 0) {
  1380. return *this;
  1381. }
  1382. CARBON_DCHECK(arg.storage() != this->storage());
  1383. if constexpr (!EntryT::IsTriviallyDestructible) {
  1384. this->view_impl_.ForEachEntry([](EntryT& entry) { entry.Destroy(); },
  1385. [](auto...) {});
  1386. }
  1387. } else {
  1388. // The sizes don't match so destroy everything and re-setup the table
  1389. // storage.
  1390. this->Destroy();
  1391. this->alloc_size() = arg.alloc_size();
  1392. // If `arg` is moved-from, we've clear out our elements and put ourselves
  1393. // into a moved-from state. We're done.
  1394. if (this->alloc_size() == 0) {
  1395. return *this;
  1396. }
  1397. SetUpStorage();
  1398. }
  1399. this->growth_budget_ = arg.growth_budget_;
  1400. this->CopySlotsFrom(arg);
  1401. return *this;
  1402. }
  1403. // Puts the incoming table into a moved-from state that can be destroyed or
  1404. // re-initialized but must not be used otherwise.
  1405. template <typename InputBaseT, ssize_t SmallSize>
  1406. TableImpl<InputBaseT, SmallSize>::TableImpl(TableImpl&& arg) noexcept
  1407. : BaseT(arg.alloc_size(), arg.growth_budget_, SmallSize) {
  1408. // Check for completely broken objects. These invariants should be true even
  1409. // in a moved-from state.
  1410. CARBON_DCHECK(arg.alloc_size() == 0 || !arg.is_small() ||
  1411. arg.alloc_size() == SmallSize);
  1412. CARBON_DCHECK(arg.small_alloc_size_ == SmallSize);
  1413. CARBON_DCHECK(this->small_alloc_size_ == SmallSize);
  1414. this->MoveFrom(std::move(arg), small_storage());
  1415. }
  1416. template <typename InputBaseT, ssize_t SmallSize>
  1417. auto TableImpl<InputBaseT, SmallSize>::operator=(TableImpl&& arg) noexcept
  1418. -> TableImpl& {
  1419. // Check for completely broken objects. These invariants should be true even
  1420. // in a moved-from state.
  1421. CARBON_DCHECK(arg.alloc_size() == 0 || !arg.is_small() ||
  1422. arg.alloc_size() == SmallSize);
  1423. CARBON_DCHECK(arg.small_alloc_size_ == SmallSize);
  1424. CARBON_DCHECK(this->small_alloc_size_ == SmallSize);
  1425. // Destroy and deallocate our table.
  1426. this->Destroy();
  1427. // Defend against self-move by zeroing the size here before we start moving
  1428. // out of `arg`.
  1429. this->alloc_size() = 0;
  1430. // Setup to match argument and then finish the move.
  1431. this->alloc_size() = arg.alloc_size();
  1432. this->growth_budget_ = arg.growth_budget_;
  1433. this->MoveFrom(std::move(arg), small_storage());
  1434. return *this;
  1435. }
  1436. // Reset a table to its original state, including releasing any allocated
  1437. // memory.
  1438. template <typename InputBaseT, ssize_t SmallSize>
  1439. auto TableImpl<InputBaseT, SmallSize>::ResetImpl() -> void {
  1440. this->Destroy();
  1441. // Re-initialize the whole thing.
  1442. CARBON_DCHECK(this->small_alloc_size() == SmallSize);
  1443. this->Construct(small_storage());
  1444. }
  1445. template <typename InputBaseT, ssize_t SmallSize>
  1446. auto TableImpl<InputBaseT, SmallSize>::small_storage() const -> Storage* {
  1447. if constexpr (SmallSize > 0) {
  1448. // Do a bunch of validation of the small size to establish our invariants
  1449. // when we know we have a non-zero small size.
  1450. static_assert(llvm::isPowerOf2_64(SmallSize),
  1451. "SmallSize must be a power of two for a hashed buffer!");
  1452. static_assert(
  1453. SmallSize >= MaxGroupSize,
  1454. "We require all small sizes to multiples of the largest group "
  1455. "size supported to ensure it can be used portably. ");
  1456. static_assert(
  1457. (SmallSize % MaxGroupSize) == 0,
  1458. "Small size must be a multiple of the max group size supported "
  1459. "so that we can allocate a whole number of groups.");
  1460. // Implied by the max asserts above.
  1461. static_assert(SmallSize >= GroupSize);
  1462. static_assert((SmallSize % GroupSize) == 0);
  1463. static_assert(SmallSize >= alignof(StorageEntry<KeyT, ValueT>),
  1464. "Requested a small size that would require padding between "
  1465. "metadata bytes and correctly aligned key and value types. "
  1466. "Either a larger small size or a zero small size and heap "
  1467. "allocation are required for this key and value type.");
  1468. static_assert(offsetof(SmallStorage, entries) == SmallSize,
  1469. "Offset to entries in small size storage doesn't match "
  1470. "computed offset!");
  1471. return &small_storage_;
  1472. } else {
  1473. static_assert(
  1474. sizeof(TableImpl) == sizeof(BaseT),
  1475. "Empty small storage caused a size difference and wasted space!");
  1476. return nullptr;
  1477. }
  1478. }
  1479. // Helper to set up the storage of a table when a specific size has already been
  1480. // set up. If possible, uses any small storage, otherwise allocates.
  1481. template <typename InputBaseT, ssize_t SmallSize>
  1482. auto TableImpl<InputBaseT, SmallSize>::SetUpStorage() -> void {
  1483. CARBON_DCHECK(this->small_alloc_size() == SmallSize);
  1484. ssize_t local_size = this->alloc_size();
  1485. CARBON_DCHECK(local_size != 0);
  1486. if (local_size == SmallSize) {
  1487. this->storage() = small_storage();
  1488. } else {
  1489. this->storage() = BaseT::Allocate(local_size);
  1490. }
  1491. }
  1492. } // namespace Carbon::RawHashtable
  1493. #endif // CARBON_COMMON_RAW_HASHTABLE_H_