set.h 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #ifndef CARBON_COMMON_SET_H_
  5. #define CARBON_COMMON_SET_H_
  6. #include <concepts>
  7. #include <type_traits>
  8. #include "common/check.h"
  9. #include "common/hashtable_key_context.h"
  10. #include "common/raw_hashtable.h"
  11. #include "llvm/Support/Compiler.h"
  12. namespace Carbon {
  13. // Forward declarations to resolve cyclic references.
  14. template <typename KeyT, typename KeyContextT>
  15. class SetView;
  16. template <typename KeyT, typename KeyContextT>
  17. class SetBase;
  18. template <typename KeyT, ssize_t SmallSize, typename KeyContextT>
  19. class Set;
  20. // A read-only view type for a set of keys.
  21. //
  22. // This view is a cheap-to-copy type that should be passed by value, but
  23. // provides view or read-only reference semantics to the underlying set data
  24. // structure.
  25. //
  26. // This should always be preferred to a `const`-ref parameter for the `SetBase`
  27. // or `Set` type as it provides more flexibility and a cleaner API. By default
  28. // a `SetView` provides no more immutability than a `const Set`: elements
  29. // can't be added or removed, but they can be mutated, and the user is
  30. // responsible for avoiding mutations that affect the hash value or equality
  31. // comparison. However, a `SetView<T>` can be converted to a `SetView<const T>`,
  32. // which prevents mutating the elements. As with any other view type, `const`
  33. // on the `SetView` itself is "shallow": it prevents rebinding the `SetView` to
  34. // a different underlying set, but doesn't affect mutability of the underlying
  35. // set.
  36. //
  37. // A specific `KeyContextT` type can optionally be provided to configure how
  38. // keys will be hashed and compared. The default is `DefaultKeyContext` which is
  39. // stateless and will hash using `Carbon::HashValue` and compare using
  40. // `operator==`. Every method accepting a lookup key or operating on the keys in
  41. // the table will also accept an instance of this type. For stateless context
  42. // types, including the default, an instance will be default constructed if not
  43. // provided to these methods. However, stateful contexts should be constructed
  44. // and passed in explicitly. The context type should be small and reasonable to
  45. // pass by value, often a wrapper or pointer to the relevant context needed for
  46. // hashing and comparing keys. For more details about the key context, see
  47. // `hashtable_key_context.h`.
  48. template <typename InputKeyT, typename InputKeyContextT = DefaultKeyContext>
  49. class SetView : RawHashtable::ViewImpl<InputKeyT, void, InputKeyContextT> {
  50. using ImplT = RawHashtable::ViewImpl<InputKeyT, void, InputKeyContextT>;
  51. public:
  52. using KeyT = typename ImplT::KeyT;
  53. using KeyContextT = typename ImplT::KeyContextT;
  54. using MetricsT = typename ImplT::MetricsT;
  55. // This type represents the result of lookup operations. It encodes whether
  56. // the lookup was a success as well as accessors for the key.
  57. class LookupResult {
  58. public:
  59. LookupResult() = default;
  60. explicit LookupResult(KeyT& key) : key_(&key) {}
  61. explicit operator bool() const { return key_ != nullptr; }
  62. auto key() const -> KeyT& { return *key_; }
  63. private:
  64. KeyT* key_ = nullptr;
  65. };
  66. // Enable implicit conversions that add `const`-ness to the key type.
  67. explicit(false)
  68. SetView(const SetView<std::remove_const_t<KeyT>, KeyContextT>& other_view)
  69. requires(!std::same_as<KeyT, std::remove_const_t<KeyT>>)
  70. : ImplT(other_view) {}
  71. // Tests whether a key is present in the set.
  72. template <typename LookupKeyT>
  73. auto Contains(LookupKeyT lookup_key,
  74. KeyContextT key_context = KeyContextT()) const -> bool;
  75. // Lookup a key in the set.
  76. template <typename LookupKeyT>
  77. auto Lookup(LookupKeyT lookup_key,
  78. KeyContextT key_context = KeyContextT()) const -> LookupResult;
  79. // Run the provided callback for every key in the set.
  80. template <typename CallbackT>
  81. auto ForEach(CallbackT callback) const -> void
  82. requires(std::invocable<CallbackT, KeyT&>);
  83. // This routine is relatively inefficient and only intended for use in
  84. // benchmarking or logging of performance anomalies. The specific metrics
  85. // returned have no specific guarantees beyond being informative in
  86. // benchmarks.
  87. auto ComputeMetrics(KeyContextT key_context = KeyContextT()) -> MetricsT {
  88. return ImplT::ComputeMetricsImpl(key_context);
  89. }
  90. private:
  91. template <typename SetKeyT, ssize_t SmallSize, typename KeyContextT>
  92. friend class Set;
  93. friend class SetBase<KeyT, KeyContextT>;
  94. friend class SetView<const KeyT, KeyContextT>;
  95. using EntryT = typename ImplT::EntryT;
  96. SetView() = default;
  97. explicit(false) SetView(ImplT base) : ImplT(base) {}
  98. SetView(ssize_t size, RawHashtable::Storage* storage)
  99. : ImplT(size, storage) {}
  100. };
  101. // A base class for a `Set` type that remains mutable while type-erasing the
  102. // `SmallSize` (SSO) template parameter.
  103. //
  104. // Note that `SetBase` has "shallow" const semantics: a `const SetBase<T>&`
  105. // can't be used to mutate the set data structure itself (e.g. by changing
  106. // the number of elements), but it can be used to mutate the `T` elements it
  107. // contains. The user is responsible for avoiding mutations that would change
  108. // the hash value or equality of an element. A `SetView<const T>` can be used
  109. // to provide read-only access to the elements of a `SetBase<T>`.
  110. //
  111. // A pointer or reference to this type is the preferred way to pass a mutable
  112. // handle to a `Set` type across API boundaries as it avoids encoding specific
  113. // SSO sizing information while providing a near-complete mutable API.
  114. template <typename InputKeyT, typename InputKeyContextT>
  115. class SetBase
  116. : protected RawHashtable::BaseImpl<InputKeyT, void, InputKeyContextT> {
  117. protected:
  118. using ImplT = RawHashtable::BaseImpl<InputKeyT, void, InputKeyContextT>;
  119. public:
  120. using KeyT = typename ImplT::KeyT;
  121. using KeyContextT = typename ImplT::KeyContextT;
  122. using ViewT = SetView<KeyT, KeyContextT>;
  123. using LookupResult = typename ViewT::LookupResult;
  124. using MetricsT = typename ImplT::MetricsT;
  125. // The result type for insertion operations both indicates whether an insert
  126. // was needed (as opposed to the key already being in the set), and provides
  127. // access to the key.
  128. class InsertResult {
  129. public:
  130. InsertResult() = default;
  131. explicit InsertResult(bool inserted, KeyT& key)
  132. : key_(&key), inserted_(inserted) {}
  133. auto is_inserted() const -> bool { return inserted_; }
  134. auto key() const -> KeyT& { return *key_; }
  135. private:
  136. KeyT* key_;
  137. bool inserted_;
  138. };
  139. // Implicitly convertible to the relevant view type.
  140. //
  141. // NOLINTNEXTLINE(google-explicit-constructor): Designed to implicitly decay.
  142. explicit(false) operator ViewT() const { return this->view_impl(); }
  143. // We can't chain the above conversion with the conversions on `ViewT` to add
  144. // const, so explicitly support adding const to produce a view here.
  145. //
  146. // NOLINTNEXTLINE(google-explicit-constructor): Designed to implicitly decay.
  147. explicit(false) operator SetView<const KeyT, KeyContextT>() const {
  148. return ViewT(*this);
  149. }
  150. // Convenience forwarder to the view type.
  151. template <typename LookupKeyT>
  152. auto Contains(LookupKeyT lookup_key,
  153. KeyContextT key_context = KeyContextT()) const -> bool {
  154. return ViewT(*this).Contains(lookup_key, key_context);
  155. }
  156. // Convenience forwarder to the view type.
  157. template <typename LookupKeyT>
  158. auto Lookup(LookupKeyT lookup_key,
  159. KeyContextT key_context = KeyContextT()) const -> LookupResult {
  160. return ViewT(*this).Lookup(lookup_key, key_context);
  161. }
  162. // Convenience forwarder to the view type.
  163. template <typename CallbackT>
  164. auto ForEach(CallbackT callback) const -> void
  165. requires(std::invocable<CallbackT, KeyT&>)
  166. {
  167. return ViewT(*this).ForEach(callback);
  168. }
  169. // Convenience forwarder to the view type.
  170. auto ComputeMetrics(KeyContextT key_context = KeyContextT()) const
  171. -> MetricsT {
  172. return ViewT(*this).ComputeMetrics(key_context);
  173. }
  174. // Insert a key into the set. If the key is already present, no insertion is
  175. // performed and that present key is available in the result. Otherwise a new
  176. // key is inserted and constructed from the argument and available in the
  177. // result.
  178. template <typename LookupKeyT>
  179. auto Insert(LookupKeyT lookup_key, KeyContextT key_context = KeyContextT())
  180. -> InsertResult;
  181. // Insert a key into the map and call the provided callback if necessary to
  182. // produce a new key when no existing value is found.
  183. //
  184. // Example: `m.Insert(key_equivalent, [] { return real_key; });`
  185. //
  186. // The point of this function is when the lookup key is _different_from the
  187. // stored key. However, we don't restrict it in case that blocks generic
  188. // usage.
  189. template <typename LookupKeyT, typename KeyCallbackT>
  190. auto Insert(LookupKeyT lookup_key, KeyCallbackT key_cb,
  191. KeyContextT key_context = KeyContextT()) -> InsertResult
  192. requires(
  193. !std::same_as<KeyT, KeyCallbackT> &&
  194. std::convertible_to<decltype(std::declval<KeyCallbackT>()()), KeyT>);
  195. // Insert a key into the set and call the provided callback to allow in-place
  196. // construction of the key if not already present. The lookup key is passed
  197. // through to the callback so it needn't be captured and can be kept in a
  198. // register argument throughout.
  199. //
  200. // Example:
  201. // ```cpp
  202. // m.Insert("widget", [](MyStringViewType lookup_key, void* key_storage) {
  203. // new (key_storage) MyStringType(lookup_key);
  204. // });
  205. // ```
  206. template <typename LookupKeyT, typename InsertCallbackT>
  207. auto Insert(LookupKeyT lookup_key, InsertCallbackT insert_cb,
  208. KeyContextT key_context = KeyContextT()) -> InsertResult
  209. requires std::invocable<InsertCallbackT, LookupKeyT, void*>;
  210. // Grow the set to a specific allocation size.
  211. //
  212. // This will grow the set's hashtable if necessary for it to have an
  213. // allocation size of `target_alloc_size` which must be a power of two. Note
  214. // that this will not allow that many keys to be inserted, but a smaller
  215. // number based on the maximum load factor. If a specific number of insertions
  216. // need to be achieved without triggering growth, use the `GrowForInsertCount`
  217. // method.
  218. auto GrowToAllocSize(ssize_t target_alloc_size,
  219. KeyContextT key_context = KeyContextT()) -> void;
  220. // Grow the set sufficiently to allow inserting the specified number of keys.
  221. auto GrowForInsertCount(ssize_t count,
  222. KeyContextT key_context = KeyContextT()) -> void;
  223. // Erase a key from the set.
  224. template <typename LookupKeyT>
  225. auto Erase(LookupKeyT lookup_key, KeyContextT key_context = KeyContextT())
  226. -> bool;
  227. // Clear all key/value pairs from the set but leave the underlying hashtable
  228. // allocated and in place.
  229. auto Clear() -> void;
  230. protected:
  231. using ImplT::ImplT;
  232. };
  233. // A data structure for a set of keys.
  234. //
  235. // This set supports small size optimization (or "SSO"). The provided
  236. // `SmallSize` type parameter indicates the size of an embedded buffer for
  237. // storing sets small enough to fit. The default is zero, which always allocates
  238. // a heap buffer on construction. When non-zero, must be a multiple of the
  239. // `MaxGroupSize` which is currently 16. The library will check that the size is
  240. // valid and provide an error at compile time if not. We don't automatically
  241. // select the next multiple or otherwise fit the size to the constraints to make
  242. // it clear in the code how much memory is used by the SSO buffer.
  243. //
  244. // This data structure optimizes heavily for small key types that are cheap to
  245. // move and even copy. Using types with large keys or expensive to copy keys may
  246. // create surprising performance bottlenecks. A `std::string` key should be fine
  247. // with generally small strings, but if some or many strings are large heap
  248. // allocations the performance of hashtable routines may be unacceptably bad and
  249. // another data structure or key design is likely preferable.
  250. //
  251. // Note that `Set`, like `SetBase`, has "shallow" const semantics. Note also
  252. // that this type should typically not appear on API boundaries; either
  253. // `SetBase` or `SetView` should be used instead.
  254. template <typename InputKeyT, ssize_t SmallSize = 0,
  255. typename InputKeyContextT = DefaultKeyContext>
  256. class Set : public RawHashtable::TableImpl<SetBase<InputKeyT, InputKeyContextT>,
  257. SmallSize> {
  258. using BaseT = SetBase<InputKeyT, InputKeyContextT>;
  259. using ImplT = RawHashtable::TableImpl<BaseT, SmallSize>;
  260. public:
  261. using KeyT = typename BaseT::KeyT;
  262. Set() = default;
  263. Set(const Set& arg) = default;
  264. Set(Set&& arg) noexcept = default;
  265. auto operator=(const Set& arg) -> Set& = default;
  266. auto operator=(Set&& arg) noexcept -> Set& = default;
  267. // Reset the entire state of the hashtable to as it was when constructed,
  268. // throwing away any intervening allocations.
  269. auto Reset() -> void;
  270. };
  271. template <typename InputKeyT, typename InputKeyContextT>
  272. template <typename LookupKeyT>
  273. auto SetView<InputKeyT, InputKeyContextT>::Contains(
  274. LookupKeyT lookup_key, KeyContextT key_context) const -> bool {
  275. return this->LookupEntry(lookup_key, key_context) != nullptr;
  276. }
  277. template <typename InputKeyT, typename InputKeyContextT>
  278. template <typename LookupKeyT>
  279. auto SetView<InputKeyT, InputKeyContextT>::Lookup(LookupKeyT lookup_key,
  280. KeyContextT key_context) const
  281. -> LookupResult {
  282. EntryT* entry = this->LookupEntry(lookup_key, key_context);
  283. if (!entry) {
  284. return LookupResult();
  285. }
  286. return LookupResult(entry->key());
  287. }
  288. template <typename InputKeyT, typename InputKeyContextT>
  289. template <typename CallbackT>
  290. auto SetView<InputKeyT, InputKeyContextT>::ForEach(CallbackT callback) const
  291. -> void
  292. requires(std::invocable<CallbackT, KeyT&>)
  293. {
  294. this->ForEachEntry([callback](EntryT& entry) { callback(entry.key()); },
  295. [](auto...) {});
  296. }
  297. template <typename InputKeyT, typename InputKeyContextT>
  298. template <typename LookupKeyT>
  299. auto SetBase<InputKeyT, InputKeyContextT>::Insert(LookupKeyT lookup_key,
  300. KeyContextT key_context)
  301. -> InsertResult {
  302. return Insert(
  303. lookup_key,
  304. [](LookupKeyT lookup_key, void* key_storage) {
  305. new (key_storage) KeyT(std::move(lookup_key));
  306. },
  307. key_context);
  308. }
  309. template <typename InputKeyT, typename InputKeyContextT>
  310. template <typename LookupKeyT, typename KeyCallbackT>
  311. auto SetBase<InputKeyT, InputKeyContextT>::Insert(LookupKeyT lookup_key,
  312. KeyCallbackT key_cb,
  313. KeyContextT key_context)
  314. -> InsertResult
  315. requires(!std::same_as<KeyT, KeyCallbackT> &&
  316. std::convertible_to<decltype(std::declval<KeyCallbackT>()()), KeyT>)
  317. {
  318. return Insert(
  319. lookup_key,
  320. [&key_cb](LookupKeyT /*lookup_key*/, void* key_storage) {
  321. new (key_storage) KeyT(key_cb());
  322. },
  323. key_context);
  324. }
  325. template <typename InputKeyT, typename InputKeyContextT>
  326. template <typename LookupKeyT, typename InsertCallbackT>
  327. auto SetBase<InputKeyT, InputKeyContextT>::Insert(LookupKeyT lookup_key,
  328. InsertCallbackT insert_cb,
  329. KeyContextT key_context)
  330. -> InsertResult
  331. requires std::invocable<InsertCallbackT, LookupKeyT, void*>
  332. {
  333. auto [entry, inserted] = this->InsertImpl(lookup_key, key_context);
  334. CARBON_DCHECK(entry, "Should always result in a valid index.");
  335. if (LLVM_LIKELY(!inserted)) {
  336. return InsertResult(false, entry->key());
  337. }
  338. insert_cb(lookup_key, static_cast<void*>(&entry->key_storage));
  339. return InsertResult(true, entry->key());
  340. }
  341. template <typename InputKeyT, typename InputKeyContextT>
  342. auto SetBase<InputKeyT, InputKeyContextT>::GrowToAllocSize(
  343. ssize_t target_alloc_size, KeyContextT key_context) -> void {
  344. this->GrowToAllocSizeImpl(target_alloc_size, key_context);
  345. }
  346. template <typename InputKeyT, typename InputKeyContextT>
  347. auto SetBase<InputKeyT, InputKeyContextT>::GrowForInsertCount(
  348. ssize_t count, KeyContextT key_context) -> void {
  349. this->GrowForInsertCountImpl(count, key_context);
  350. }
  351. template <typename InputKeyT, typename InputKeyContextT>
  352. template <typename LookupKeyT>
  353. auto SetBase<InputKeyT, InputKeyContextT>::Erase(LookupKeyT lookup_key,
  354. KeyContextT key_context)
  355. -> bool {
  356. return this->EraseImpl(lookup_key, key_context);
  357. }
  358. template <typename InputKeyT, typename InputKeyContextT>
  359. auto SetBase<InputKeyT, InputKeyContextT>::Clear() -> void {
  360. this->ClearImpl();
  361. }
  362. template <typename InputKeyT, ssize_t SmallSize, typename InputKeyContextT>
  363. auto Set<InputKeyT, SmallSize, InputKeyContextT>::Reset() -> void {
  364. this->ResetImpl();
  365. }
  366. } // namespace Carbon
  367. #endif // CARBON_COMMON_SET_H_