hashing.h 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #ifndef CARBON_COMMON_HASHING_H_
  5. #define CARBON_COMMON_HASHING_H_
  6. #include <string>
  7. #include <tuple>
  8. #include <type_traits>
  9. #include <utility>
  10. #include "common/check.h"
  11. #include "common/ostream.h"
  12. #include "llvm/ADT/ArrayRef.h"
  13. #include "llvm/ADT/SmallVector.h"
  14. #include "llvm/ADT/StringRef.h"
  15. #include "llvm/Support/FormatVariadic.h"
  16. #include "llvm/Support/MathExtras.h"
  17. #ifdef __ARM_ACLE
  18. #include <arm_acle.h>
  19. #endif
  20. namespace Carbon {
  21. // A 64-bit hash code produced by `Carbon::HashValue`.
  22. //
  23. // This provides methods for extracting high-quality bits from the hash code
  24. // quickly.
  25. //
  26. // This class can also be a hashing input when recursively hashing more complex
  27. // data structures.
  28. class HashCode : public Printable<HashCode> {
  29. public:
  30. HashCode() = default;
  31. constexpr explicit HashCode(uint64_t value) : value_(value) {}
  32. friend constexpr auto operator==(HashCode lhs, HashCode rhs) -> bool {
  33. return lhs.value_ == rhs.value_;
  34. }
  35. friend constexpr auto operator!=(HashCode lhs, HashCode rhs) -> bool {
  36. return lhs.value_ != rhs.value_;
  37. }
  38. // Extracts an index from the hash code that is in the range [0, size). The
  39. // size and returned index are `ssize_t` for performance reasons. This is
  40. // useful when using the hash code to index a hash table. It prioritizes
  41. // computing the index from the bits in the hash code with the highest
  42. // entropy.
  43. constexpr auto ExtractIndex(ssize_t size) -> ssize_t;
  44. // Extracts an index and a fixed `N`-bit tag from the hash code.
  45. //
  46. // This will both minimize overlap between the tag and the index as well as
  47. // maximizing the entropy of the bits that contribute to each.
  48. //
  49. // The index will be in the range [0, `size`). The `size` must be a power of
  50. // two, and `N` must be in the range [1, 32].
  51. template <int N>
  52. constexpr auto ExtractIndexAndTag(ssize_t size)
  53. -> std::pair<ssize_t, uint32_t>;
  54. // Extract the full 64-bit hash code as an integer.
  55. //
  56. // The methods above should be preferred rather than directly manipulating
  57. // this integer. This is provided primarily to enable Merkle-tree hashing or
  58. // other recursive hashing where that is needed or more efficient.
  59. explicit operator uint64_t() const { return value_; }
  60. auto Print(llvm::raw_ostream& out) const -> void {
  61. out << llvm::formatv("{0:x16}", value_);
  62. }
  63. private:
  64. uint64_t value_ = 0;
  65. };
  66. // Computes a hash code for the provided value, incorporating the provided seed.
  67. //
  68. // The seed doesn't need to be of any particular high quality, but a zero seed
  69. // has bad effects in several places. Prefer the unseeded routine rather than
  70. // providing a zero here.
  71. //
  72. // This **not** a cryptographically secure or stable hash -- it is only designed
  73. // for use with in-memory hash table style data structures. Being fast and
  74. // effective for that use case is the guiding principle of its design.
  75. //
  76. // There is no guarantee that the values produced are stable from execution to
  77. // execution. For speed and quality reasons, the implementation does not
  78. // introduce any variance to defend against accidental dependencies. As a
  79. // consequence, it is strongly encouraged to use a seed that varies from
  80. // execution to execution to avoid depending on specific values produced.
  81. //
  82. // The algorithm used is most heavily based on [Abseil's hashing algorithm][1],
  83. // with some additional ideas and inspiration from the fallback hashing
  84. // algorithm in [Rust's AHash][2] and the [FxHash][3] function. However, there
  85. // are also *significant* changes introduced here.
  86. //
  87. // [1]: https://github.com/abseil/abseil-cpp/tree/master/absl/hash/internal
  88. // [2]: https://github.com/tkaitchuck/aHash/wiki/AHash-fallback-algorithm
  89. // [3]: https://docs.rs/fxhash/latest/fxhash/
  90. //
  91. // This hash algorithm does *not* defend against hash flooding. While it can be
  92. // viewed as "keyed" on the seed, it is expected to be possible to craft inputs
  93. // for some data types that cancel out the seed used and manufacture endlessly
  94. // colliding sets of keys. In general, this function works to be *fast* for hash
  95. // tables. If you need to defend against hash flooding, either directly use a
  96. // data structure with strong worst-case guarantees, or a hash table which
  97. // detects catastrophic collisions and falls back to such a data structure.
  98. //
  99. // This hash function is heavily optimized for *latency* over *quality*. Modern
  100. // hash tables designs can efficiently handle reasonable collision rates,
  101. // including using extra bits from the hash to avoid all efficiency coming from
  102. // the same low bits. Because of this, low-latency is significantly more
  103. // important for performance than high-quality, and this is heavily leveraged.
  104. // The result is that the hash codes produced *do* have significant avalanche
  105. // problems for small keys. The upside is that the latency for hashing integers,
  106. // pointers, and small byte strings (up to 32-bytes) is exceptionally low, and
  107. // essentially a small constant time instruction sequence.
  108. //
  109. // No exotic instruction set extensions are required, and the state used is
  110. // small. It does rely on being able to get the low- and high-64-bit results of
  111. // a 64-bit multiply efficiently.
  112. //
  113. // The function supports many typical data types such as primitives, string-ish
  114. // views, and types composing primitives transparently like pairs, tuples, and
  115. // array-ish views. It is also extensible to support user-defined types.
  116. //
  117. // The builtin support for string-like types include:
  118. // - `std::string_view`
  119. // - `std::string`
  120. // - `llvm::StringRef`
  121. // - `llvm::SmallString`
  122. //
  123. // This function supports heterogeneous lookup between all of the string-like
  124. // types. It also supports heterogeneous lookup between pointer types regardless
  125. // of pointee type and `nullptr`.
  126. //
  127. // However, these are the only heterogeneous lookup support including for the
  128. // builtin in, standard, and LLVM types. Notably, each different size and
  129. // signedness integer type may hash differently for efficiency reasons. Hash
  130. // tables should pick a single integer type in which to manage keys and do
  131. // lookups.
  132. //
  133. // To add support for your type, you need to implement a customization point --
  134. // a free function that can be found by ADL for your type -- called
  135. // `CarbonHashValue` with the following signature:
  136. //
  137. // ```cpp
  138. // auto CarbonHashValue(const YourType& value, uint64_t seed) -> HashCode;
  139. // ```
  140. //
  141. // The extension point needs to ensure that values that compare equal (including
  142. // any comparisons with different types that might be used with a hash table of
  143. // `YourType` keys) produce the same `HashCode` values.
  144. //
  145. // `HashCode` values should typically be produced using the `Hasher` helper type
  146. // below. See its documentation for more details about implementing these
  147. // customization points and how best to incorporate the value's state into a
  148. // `HashCode`.
  149. //
  150. // For two input values that are almost but not quite equal, the extension
  151. // point should maximize the probability of each bit of their resulting
  152. // `HashCode`s differing. More formally, `HashCode`s should exhibit an
  153. // [avalanche effect][4]. However, while this is desirable, it should be
  154. // **secondary** to low latency. The intended use case of these functions is not
  155. // cryptography but in-memory hashtables where the latency and overhead of
  156. // computing the `HashCode` is *significantly* more important than achieving a
  157. // particularly high quality. The goal is to have "just enough" avalanche
  158. // effect, but there is not a fixed criteria for how much is enough. That should
  159. // be determined through practical experimentation with a hashtable and
  160. // distribution of keys.
  161. //
  162. // [4]: https://en.wikipedia.org/wiki/Avalanche_effect
  163. template <typename T>
  164. inline auto HashValue(const T& value, uint64_t seed) -> HashCode;
  165. // The same as the seeded version of `HashValue` but without callers needing to
  166. // provide a seed.
  167. //
  168. // Generally prefer the seeded version, but this is available if there is no
  169. // reasonable seed. In particular, this will behave better than using a seed of
  170. // `0`. One important use case is for recursive hashing of sub-objects where
  171. // appropriate or needed.
  172. template <typename T>
  173. inline auto HashValue(const T& value) -> HashCode;
  174. // Object and APIs that eventually produce a hash code.
  175. //
  176. // This type is primarily used by types to implement a customization point
  177. // `CarbonHashValue` that will in turn be used by the `HashValue` function. See
  178. // the `HashValue` function for details of that extension point.
  179. //
  180. // The methods on this type can be used to incorporate data from your
  181. // user-defined type into its internal state which can be converted to a
  182. // `HashCode` at any time. These methods will only produce the same `HashCode`
  183. // if they are called in the exact same order with the same arguments -- there
  184. // are no guaranteed equivalences between calling different methods.
  185. //
  186. // Example usage:
  187. // ```cpp
  188. // auto CarbonHashValue(const MyType& value, uint64_t seed) -> HashCode {
  189. // Hasher hasher(seed);
  190. // hasher.HashTwo(value.x, value.y);
  191. // return static_cast<HashCode>(hasher);
  192. // }
  193. // ```
  194. //
  195. // This type's API also reflects the reality that high-performance hash tables
  196. // are used with keys that are generally small and cheap to hash.
  197. //
  198. // To ensure this type's code is optimized effectively, it should typically be
  199. // used as a local variable and not passed across function boundaries
  200. // unnecessarily.
  201. //
  202. // The type also provides a number of static helper functions and static data
  203. // members that may be used by authors of `CarbonHashValue` implementations to
  204. // efficiently compute the inputs to the core `Hasher` methods, or even to
  205. // manually do some amounts of hashing in performance-tuned ways outside of the
  206. // methods provided.
  207. class Hasher {
  208. public:
  209. Hasher() = default;
  210. explicit Hasher(uint64_t seed) : buffer(seed) {}
  211. Hasher(Hasher&& arg) = default;
  212. Hasher(const Hasher& arg) = delete;
  213. auto operator=(Hasher&& rhs) -> Hasher& = default;
  214. // Extracts the current state as a `HashCode` for use.
  215. explicit operator HashCode() const { return HashCode(buffer); }
  216. // Incorporates an object into the hasher's state by hashing its object
  217. // representation. Requires `value`'s type to have a unique object
  218. // representation. This is primarily useful for builtin and primitive types.
  219. //
  220. // This can be directly used for simple users combining some aggregation of
  221. // objects. However, when possible, prefer the variadic version below for
  222. // aggregating several primitive types into a hash.
  223. template <typename T, typename = std::enable_if_t<
  224. std::has_unique_object_representations_v<T>>>
  225. auto Hash(const T& value) -> void;
  226. // Incorporates a variable number of objects into the `hasher`s state in a
  227. // similar manner to applying the above function to each one in series. It has
  228. // the same requirements as the above function for each `value`. And it
  229. // returns the updated `hasher`.
  230. //
  231. // There is no guaranteed correspondence between the behavior of a single call
  232. // with multiple parameters and multiple calls. This routine is also optimized
  233. // for handling relatively small numbers of objects. For hashing large
  234. // aggregations, consider some Merkle-tree decomposition or arranging for a
  235. // byte buffer that can be hashed as a single buffer. However, hashing large
  236. // aggregations of data in this way is rarely results in effectively
  237. // high-performance hash table data structures and so should generally be
  238. // avoided.
  239. template <typename... Ts,
  240. typename = std::enable_if_t<
  241. (... && std::has_unique_object_representations_v<Ts>)>>
  242. auto Hash(const Ts&... value) -> void;
  243. // Simpler and more primitive functions to incorporate state represented in
  244. // `uint64_t` values into the hasher's state.
  245. //
  246. // These may be slightly less efficient than the `Hash` method above for a
  247. // typical application code `uint64_t`, but are designed to work well even
  248. // when relevant data has been packed into the `uint64_t` parameters densely.
  249. auto HashDense(uint64_t data) -> void;
  250. auto HashDense(uint64_t data0, uint64_t data1) -> void;
  251. // A heavily optimized routine for incorporating a dynamically sized sequence
  252. // of bytes into the hasher's state.
  253. //
  254. // This routine has carefully structured inline code paths for short byte
  255. // sequences and a reasonably high bandwidth code path for longer sequences.
  256. // The size of the byte sequence is always incorporated into the hasher's
  257. // state along with the contents.
  258. auto HashSizedBytes(llvm::ArrayRef<std::byte> bytes) -> void;
  259. // An out-of-line, throughput-optimized routine for incorporating a
  260. // dynamically sized sequence when the sequence size is guaranteed to be >32.
  261. // The size is always incorporated into the state.
  262. auto HashSizedBytesLarge(llvm::ArrayRef<std::byte> bytes) -> void;
  263. // Utility functions to read data of various sizes efficiently into a
  264. // 64-bit value. These pointers need-not be aligned, and can alias other
  265. // objects. The representation of the read data in the `uint64_t` returned is
  266. // not stable or guaranteed.
  267. static auto Read1(const std::byte* data) -> uint64_t;
  268. static auto Read2(const std::byte* data) -> uint64_t;
  269. static auto Read4(const std::byte* data) -> uint64_t;
  270. static auto Read8(const std::byte* data) -> uint64_t;
  271. // Similar to the `ReadN` functions, but supports reading a range of different
  272. // bytes provided by the size *without branching on the size*. The lack of
  273. // branches is often key, and the code in these routines works to be efficient
  274. // in extracting a *dynamic* size of bytes into the returned `uint64_t`. There
  275. // may be overlap between different routines, because these routines are based
  276. // on different implementation techniques that do have some overlap in the
  277. // range of sizes they can support. Which routine is the most efficient for a
  278. // size in the overlap isn't trivial, and so these primitives are provided
  279. // as-is and should be selected based on the localized generated code and
  280. // benchmarked performance.
  281. static auto Read1To3(const std::byte* data, ssize_t size) -> uint64_t;
  282. static auto Read4To8(const std::byte* data, ssize_t size) -> uint64_t;
  283. static auto Read8To16(const std::byte* data, ssize_t size)
  284. -> std::pair<uint64_t, uint64_t>;
  285. // Reads the underlying object representation of a type into a 64-bit integer
  286. // efficiently. Only supports types with unique object representation and at
  287. // most 8-bytes large. This is typically used to read primitive types.
  288. template <typename T,
  289. typename = std::enable_if_t<
  290. std::has_unique_object_representations_v<T> && sizeof(T) <= 8>>
  291. static auto ReadSmall(const T& value) -> uint64_t;
  292. // The core of the hash algorithm is this mix function. The specific
  293. // operations are not guaranteed to be stable but are described here for
  294. // hashing authors to understand what to expect.
  295. //
  296. // Currently, this uses the same "mix" operation as in Abseil, AHash, and
  297. // several other hashing algorithms. It takes two 64-bit integers, and
  298. // multiplies them, capturing both the high 64-bit result and the low 64-bit
  299. // result, and then XOR-ing those two halves together.
  300. //
  301. // A consequence of this operation is that a zero on either side will fail to
  302. // incorporate any bits from the other side. Often, this is an acceptable rate
  303. // of collision in practice. But it is worth being aware of and working to
  304. // avoid common paths encountering this. For example, naively used this might
  305. // cause different length all-zero byte strings to hash the same, essentially
  306. // losing the length in the composition of the hash for a likely important
  307. // case of byte sequence.
  308. //
  309. // Another consequence of the particular implementation is that it is useful
  310. // to have a reasonable distribution of bits throughout both sides of the
  311. // multiplication. However, it is not *necessary* as we do capture the
  312. // complete 128-bit result. Where reasonable, the caller should XOR random
  313. // data into operands before calling `Mix` to try and increase the
  314. // distribution of bits feeding the multiply.
  315. static auto Mix(uint64_t lhs, uint64_t rhs) -> uint64_t;
  316. // An alternative to `Mix` that is significantly weaker but also lower
  317. // latency. It should not be used when the input `uint64_t` is densely packed
  318. // with data, but is a good option for hashing a single integer or pointer
  319. // where the full 64-bits are sparsely populated and especially the high bits
  320. // are often invariant between interestingly different values.
  321. //
  322. // This uses just the low 64-bit result of a multiply. It ensures the operand
  323. // is good at diffusing bits, but inherently the high bits of the input will
  324. // be (significantly) less often represented in the output. It also does some
  325. // reversal to ensure the *low* bits of the result are the most useful ones.
  326. static auto WeakMix(uint64_t value) -> uint64_t;
  327. // We have a 64-byte random data pool designed to fit on a single cache line.
  328. // This routine allows sampling it at byte indices, which allows getting 64 -
  329. // 8 different random 64-bit results. The offset must be in the range [0, 56).
  330. static auto SampleRandomData(ssize_t offset) -> uint64_t {
  331. CARBON_DCHECK(offset + sizeof(uint64_t) < sizeof(StaticRandomData));
  332. uint64_t data;
  333. memcpy(&data,
  334. reinterpret_cast<const unsigned char*>(&StaticRandomData) + offset,
  335. sizeof(data));
  336. return data;
  337. }
  338. // Random data taken from the hexadecimal digits of Pi's fractional component,
  339. // written in lexical order for convenience of reading. The resulting
  340. // byte-stream will be different due to little-endian integers. These can be
  341. // used directly for convenience rather than calling `SampleRandomData`, but
  342. // be aware that this is the underlying pool. The goal is to reuse the same
  343. // single cache-line of constant data.
  344. //
  345. // The initializers here can be generated with the following shell script,
  346. // which will generate 8 64-bit values and one more digit. The `bc` command's
  347. // decimal based scaling means that without getting at least some extra hex
  348. // digits rendered there will be rounding that we don't want so the script
  349. // below goes on to produce one more hex digit ensuring the the 8 initializers
  350. // aren't rounded in any way. Using a higher scale won't cause the 8
  351. // initializers here to change further.
  352. //
  353. // ```sh
  354. // echo 'obase=16; scale=155; 4*a(1)' | env BC_LINE_LENGTH=500 bc -l \
  355. // | cut -c 3- | tr '[:upper:]' '[:lower:]' \
  356. // | sed -e "s/.\{4\}/&'/g" \
  357. // | sed -e "s/\(.\{4\}'.\{4\}'.\{4\}'.\{4\}\)'/0x\1,\n/g"
  358. // ```
  359. static inline constexpr std::array<uint64_t, 8> StaticRandomData = {
  360. 0x243f'6a88'85a3'08d3, 0x1319'8a2e'0370'7344, 0xa409'3822'299f'31d0,
  361. 0x082e'fa98'ec4e'6c89, 0x4528'21e6'38d0'1377, 0xbe54'66cf'34e9'0c6c,
  362. 0xc0ac'29b7'c97c'50dd, 0x3f84'd5b5'b547'0917,
  363. };
  364. // The multiplicative hash constant from Knuth, derived from 2^64 / Phi. For
  365. // details on its selection, see:
  366. // https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
  367. // https://book.huihoo.com/data-structures-and-algorithms-with-object-oriented-design-patterns-in-c++/html/page214.html
  368. static constexpr uint64_t MulConstant = 0x9e37'79b9'7f4a'7c15U;
  369. private:
  370. uint64_t buffer;
  371. };
  372. // A dedicated namespace for `CarbonHashValue` overloads that are not found by
  373. // ADL with their associated types. For example, primitive type overloads or
  374. // overloads for types in LLVM's libraries.
  375. namespace HashDispatch {
  376. inline auto CarbonHashValue(llvm::ArrayRef<std::byte> bytes, uint64_t seed)
  377. -> HashCode {
  378. Hasher hasher(seed);
  379. hasher.HashSizedBytes(bytes);
  380. return static_cast<HashCode>(hasher);
  381. }
  382. // Hashing implementation for `llvm::StringRef`. We forward all the other
  383. // string-like types that support heterogeneous lookup to this one.
  384. inline auto CarbonHashValue(llvm::StringRef value, uint64_t seed) -> HashCode {
  385. return CarbonHashValue(
  386. llvm::ArrayRef(reinterpret_cast<const std::byte*>(value.data()),
  387. value.size()),
  388. seed);
  389. }
  390. inline auto CarbonHashValue(std::string_view value, uint64_t seed) -> HashCode {
  391. return CarbonHashValue(llvm::StringRef(value.data(), value.size()), seed);
  392. }
  393. inline auto CarbonHashValue(const std::string& value, uint64_t seed)
  394. -> HashCode {
  395. return CarbonHashValue(llvm::StringRef(value.data(), value.size()), seed);
  396. }
  397. template <unsigned Length>
  398. inline auto CarbonHashValue(const llvm::SmallString<Length>& value,
  399. uint64_t seed) -> HashCode {
  400. return CarbonHashValue(llvm::StringRef(value.data(), value.size()), seed);
  401. }
  402. // C++ guarantees this is true for the unsigned variants, but we require it for
  403. // signed variants and pointers.
  404. static_assert(std::has_unique_object_representations_v<int8_t>);
  405. static_assert(std::has_unique_object_representations_v<int16_t>);
  406. static_assert(std::has_unique_object_representations_v<int32_t>);
  407. static_assert(std::has_unique_object_representations_v<int64_t>);
  408. static_assert(std::has_unique_object_representations_v<void*>);
  409. // C++ uses `std::nullptr_t` but unfortunately doesn't make it have a unique
  410. // object representation. To address that, we need a function that converts
  411. // `nullptr` back into a `void*` that will have a unique object representation.
  412. // And this needs to be done by-value as we need to build a temporary object to
  413. // return, which requires a separate overload rather than just using a type
  414. // function that could be used in parallel in the predicate below. Instead, we
  415. // build the predicate independently of the mapping overload, but together they
  416. // should produce the correct result.
  417. template <typename T>
  418. inline auto MapNullPtrToVoidPtr(const T& value) -> const T& {
  419. // This overload should never be selected for `std::nullptr_t`, so
  420. // static_assert to get some better compiler error messages.
  421. static_assert(!std::is_same_v<T, std::nullptr_t>);
  422. return value;
  423. }
  424. inline auto MapNullPtrToVoidPtr(std::nullptr_t /*value*/) -> const void* {
  425. return nullptr;
  426. }
  427. // Predicate to be used in conjunction with a `nullptr` mapping routine like the
  428. // above.
  429. template <typename T>
  430. constexpr bool NullPtrOrHasUniqueObjectRepresentations =
  431. std::is_same_v<T, std::nullptr_t> ||
  432. std::has_unique_object_representations_v<T>;
  433. template <typename T, typename = std::enable_if_t<
  434. NullPtrOrHasUniqueObjectRepresentations<T>>>
  435. inline auto CarbonHashValue(const T& value, uint64_t seed) -> HashCode {
  436. Hasher hasher(seed);
  437. hasher.Hash(MapNullPtrToVoidPtr(value));
  438. return static_cast<HashCode>(hasher);
  439. }
  440. template <typename... Ts,
  441. typename = std::enable_if_t<
  442. (... && NullPtrOrHasUniqueObjectRepresentations<Ts>)>>
  443. inline auto CarbonHashValue(const std::tuple<Ts...>& value, uint64_t seed)
  444. -> HashCode {
  445. Hasher hasher(seed);
  446. std::apply(
  447. [&](const auto&... args) { hasher.Hash(MapNullPtrToVoidPtr(args)...); },
  448. value);
  449. return static_cast<HashCode>(hasher);
  450. }
  451. template <typename T, typename U,
  452. typename = std::enable_if_t<
  453. NullPtrOrHasUniqueObjectRepresentations<T> &&
  454. NullPtrOrHasUniqueObjectRepresentations<U> &&
  455. sizeof(T) <= sizeof(uint64_t) && sizeof(U) <= sizeof(uint64_t)>>
  456. inline auto CarbonHashValue(const std::pair<T, U>& value, uint64_t seed)
  457. -> HashCode {
  458. return CarbonHashValue(std::tuple(value.first, value.second), seed);
  459. }
  460. template <typename T, typename = std::enable_if_t<
  461. std::has_unique_object_representations_v<T>>>
  462. inline auto CarbonHashValue(llvm::ArrayRef<T> objs, uint64_t seed) -> HashCode {
  463. return CarbonHashValue(
  464. llvm::ArrayRef(reinterpret_cast<const std::byte*>(objs.data()),
  465. objs.size() * sizeof(T)),
  466. seed);
  467. }
  468. template <typename T>
  469. inline auto DispatchImpl(const T& value, uint64_t seed) -> HashCode {
  470. // This unqualified call will find both the overloads in this namespace and
  471. // ADL-found functions in an associated namespace of `T`.
  472. return CarbonHashValue(value, seed);
  473. }
  474. } // namespace HashDispatch
  475. template <typename T>
  476. inline auto HashValue(const T& value, uint64_t seed) -> HashCode {
  477. return HashDispatch::DispatchImpl(value, seed);
  478. }
  479. template <typename T>
  480. inline auto HashValue(const T& value) -> HashCode {
  481. // When a seed isn't provided, use the last 64-bit chunk of random data. Other
  482. // chunks (especially the first) are more often XOR-ed with the seed and risk
  483. // cancelling each other out and feeding a zero to a `Mix` call in a way that
  484. // sharply increasing collisions.
  485. return HashValue(value, Hasher::StaticRandomData[7]);
  486. }
  487. inline constexpr auto HashCode::ExtractIndex(ssize_t size) -> ssize_t {
  488. CARBON_DCHECK(llvm::isPowerOf2_64(size));
  489. return value_ & (size - 1);
  490. }
  491. template <int N>
  492. inline constexpr auto HashCode::ExtractIndexAndTag(ssize_t size)
  493. -> std::pair<ssize_t, uint32_t> {
  494. static_assert(N >= 1);
  495. static_assert(N <= 32);
  496. CARBON_DCHECK(llvm::isPowerOf2_64(size));
  497. CARBON_DCHECK(1LL << (64 - N) >= size) << "Not enough bits for size and tag!";
  498. return {static_cast<ssize_t>((value_ >> N) & (size - 1)),
  499. static_cast<uint32_t>(value_ & ((1U << (N + 1)) - 1))};
  500. }
  501. // Building with `-DCARBON_MCA_MARKERS` will enable `llvm-mca` annotations in
  502. // the source code. These can interfere with optimization, but allows analyzing
  503. // the generated `.s` file with the `llvm-mca` tool. Documentation for these
  504. // markers is here:
  505. // https://llvm.org/docs/CommandGuide/llvm-mca.html#using-markers-to-analyze-specific-code-blocks
  506. #if CARBON_MCA_MARKERS
  507. #define CARBON_MCA_BEGIN(NAME) \
  508. __asm volatile("# LLVM-MCA-BEGIN " NAME "" ::: "memory");
  509. #define CARBON_MCA_END(NAME) \
  510. __asm volatile("# LLVM-MCA-END " NAME "" ::: "memory");
  511. #else
  512. #define CARBON_MCA_BEGIN(NAME)
  513. #define CARBON_MCA_END(NAME)
  514. #endif
  515. inline auto Hasher::Read1(const std::byte* data) -> uint64_t {
  516. uint8_t result;
  517. std::memcpy(&result, data, sizeof(result));
  518. return result;
  519. }
  520. inline auto Hasher::Read2(const std::byte* data) -> uint64_t {
  521. uint16_t result;
  522. std::memcpy(&result, data, sizeof(result));
  523. return result;
  524. }
  525. inline auto Hasher::Read4(const std::byte* data) -> uint64_t {
  526. uint32_t result;
  527. std::memcpy(&result, data, sizeof(result));
  528. return result;
  529. }
  530. inline auto Hasher::Read8(const std::byte* data) -> uint64_t {
  531. uint64_t result;
  532. std::memcpy(&result, data, sizeof(result));
  533. return result;
  534. }
  535. inline auto Hasher::Read1To3(const std::byte* data, ssize_t size) -> uint64_t {
  536. // Use carefully crafted indexing to avoid branches on the exact size while
  537. // reading.
  538. uint64_t byte0 = static_cast<uint8_t>(data[0]);
  539. uint64_t byte1 = static_cast<uint8_t>(data[size - 1]);
  540. uint64_t byte2 = static_cast<uint8_t>(data[size >> 1]);
  541. return byte0 | (byte1 << 16) | (byte2 << 8);
  542. }
  543. inline auto Hasher::Read4To8(const std::byte* data, ssize_t size) -> uint64_t {
  544. uint32_t low;
  545. std::memcpy(&low, data, sizeof(low));
  546. uint32_t high;
  547. std::memcpy(&high, data + size - sizeof(high), sizeof(high));
  548. return low | (static_cast<uint64_t>(high) << 32);
  549. }
  550. inline auto Hasher::Read8To16(const std::byte* data, ssize_t size)
  551. -> std::pair<uint64_t, uint64_t> {
  552. uint64_t low;
  553. std::memcpy(&low, data, sizeof(low));
  554. uint64_t high;
  555. std::memcpy(&high, data + size - sizeof(high), sizeof(high));
  556. return {low, high};
  557. }
  558. inline auto Hasher::Mix(uint64_t lhs, uint64_t rhs) -> uint64_t {
  559. // Use the C23 extended integer support that Clang provides as a general
  560. // language extension.
  561. using U128 = unsigned _BitInt(128);
  562. U128 result = static_cast<U128>(lhs) * static_cast<U128>(rhs);
  563. return static_cast<uint64_t>(result) ^ static_cast<uint64_t>(result >> 64);
  564. }
  565. inline auto Hasher::WeakMix(uint64_t value) -> uint64_t {
  566. value *= MulConstant;
  567. #ifdef __ARM_ACLE
  568. // Arm has a fast bit-reversal that gives us the optimal distribution.
  569. value = __rbitll(value);
  570. #else
  571. // Otherwise, assume an optimized BSWAP such as x86's. That's close enough.
  572. value = __builtin_bswap64(value);
  573. #endif
  574. return value;
  575. }
  576. inline auto Hasher::HashDense(uint64_t data) -> void {
  577. // When hashing exactly one 64-bit entity use the Phi-derived constant as this
  578. // is just multiplicative hashing. The initial buffer is mixed on input to
  579. // pipeline with materializing the constant.
  580. buffer = Mix(data ^ buffer, MulConstant);
  581. }
  582. inline auto Hasher::HashDense(uint64_t data0, uint64_t data1) -> void {
  583. // When hashing two chunks of data at the same time, we XOR it with random
  584. // data to avoid common inputs from having especially bad multiplicative
  585. // effects. We also XOR in the starting buffer as seed or to chain. Note that
  586. // we don't use *consecutive* random data 64-bit values to avoid a common
  587. // compiler "optimization" of loading both 64-bit chunks into a 128-bit vector
  588. // and doing the XOR in the vector unit. The latency of extracting the data
  589. // afterward eclipses any benefit. Callers will routinely have two consecutive
  590. // data values here, but using non-consecutive keys avoids any vectorization
  591. // being tempting.
  592. //
  593. // XOR-ing both the incoming state and a random word over the second data is
  594. // done to pipeline with materializing the constants and is observed to have
  595. // better performance than XOR-ing after the mix.
  596. //
  597. // This roughly matches the mix pattern used in the larger mixing routines
  598. // from Abseil, which is a more minimal form than used in other algorithms
  599. // such as AHash and seems adequate for latency-optimized use cases.
  600. buffer =
  601. Mix(data0 ^ StaticRandomData[1], data1 ^ StaticRandomData[3] ^ buffer);
  602. }
  603. template <typename T, typename /*enable_if*/>
  604. inline auto Hasher::ReadSmall(const T& value) -> uint64_t {
  605. const auto* storage = reinterpret_cast<const std::byte*>(&value);
  606. if constexpr (sizeof(T) == 1) {
  607. return Read1(storage);
  608. } else if constexpr (sizeof(T) == 2) {
  609. return Read2(storage);
  610. } else if constexpr (sizeof(T) == 3) {
  611. return Read2(storage) | (Read1(&storage[2]) << 16);
  612. } else if constexpr (sizeof(T) == 4) {
  613. return Read4(storage);
  614. } else if constexpr (sizeof(T) == 5) {
  615. return Read4(storage) | (Read1(&storage[4]) << 32);
  616. } else if constexpr (sizeof(T) == 6 || sizeof(T) == 7) {
  617. // Use overlapping 4-byte reads for 6 and 7 bytes.
  618. return Read4(storage) | (Read4(&storage[sizeof(T) - 4]) << 32);
  619. } else if constexpr (sizeof(T) == 8) {
  620. return Read8(storage);
  621. } else {
  622. static_assert(sizeof(T) <= 8);
  623. }
  624. }
  625. template <typename T, typename /*enable_if*/>
  626. inline auto Hasher::Hash(const T& value) -> void {
  627. if constexpr (sizeof(T) <= 8) {
  628. // For types size 8-bytes and smaller directly being hashed (as opposed to
  629. // 8-bytes potentially bit-packed with data), we rarely expect the incoming
  630. // data to fully and densely populate all 8 bytes. For these cases we have a
  631. // `WeakMix` routine that is lower latency but lower quality.
  632. CARBON_MCA_BEGIN("fixed-8b");
  633. buffer = WeakMix(ReadSmall(value));
  634. CARBON_MCA_END("fixed-8b");
  635. return;
  636. }
  637. const auto* data_ptr = reinterpret_cast<const std::byte*>(&value);
  638. if constexpr (8 < sizeof(T) && sizeof(T) <= 16) {
  639. CARBON_MCA_BEGIN("fixed-16b");
  640. auto values = Read8To16(data_ptr, sizeof(T));
  641. HashDense(values.first, values.second);
  642. CARBON_MCA_END("fixed-16b");
  643. return;
  644. }
  645. if constexpr (16 < sizeof(T) && sizeof(T) <= 32) {
  646. CARBON_MCA_BEGIN("fixed-32b");
  647. // Essentially the same technique used for dynamically sized byte sequences
  648. // of this size, but we start with a fixed XOR of random data.
  649. buffer ^= StaticRandomData[0];
  650. uint64_t m0 = Mix(Read8(data_ptr) ^ StaticRandomData[1],
  651. Read8(data_ptr + 8) ^ buffer);
  652. const std::byte* tail_16b_ptr = data_ptr + (sizeof(T) - 16);
  653. uint64_t m1 = Mix(Read8(tail_16b_ptr) ^ StaticRandomData[3],
  654. Read8(tail_16b_ptr + 8) ^ buffer);
  655. buffer = m0 ^ m1;
  656. CARBON_MCA_END("fixed-32b");
  657. return;
  658. }
  659. // Hashing the size isn't relevant here, but is harmless, so fall back to a
  660. // common code path.
  661. HashSizedBytesLarge(llvm::ArrayRef<std::byte>(data_ptr, sizeof(T)));
  662. }
  663. template <typename... Ts, typename /*enable_if*/>
  664. inline auto Hasher::Hash(const Ts&... value) -> void {
  665. if constexpr (sizeof...(Ts) == 0) {
  666. buffer ^= StaticRandomData[0];
  667. return;
  668. }
  669. if constexpr (sizeof...(Ts) == 1) {
  670. Hash(value...);
  671. return;
  672. }
  673. if constexpr ((... && (sizeof(Ts) <= 8))) {
  674. if constexpr (sizeof...(Ts) == 2) {
  675. HashDense(ReadSmall(value)...);
  676. return;
  677. }
  678. // More than two, but all small -- read each one into a contiguous buffer of
  679. // data. This may be a bit memory wasteful by padding everything out to
  680. // 8-byte chunks, but for that regularity the hashing is likely faster.
  681. const uint64_t data[] = {ReadSmall(value)...};
  682. Hash(data);
  683. return;
  684. }
  685. // For larger objects, hash each one down to a hash code and then hash those
  686. // as a buffer.
  687. const uint64_t data[] = {static_cast<uint64_t>(HashValue(value))...};
  688. Hash(data);
  689. }
  690. inline auto Hasher::HashSizedBytes(llvm::ArrayRef<std::byte> bytes) -> void {
  691. const std::byte* data_ptr = bytes.data();
  692. const ssize_t size = bytes.size();
  693. // First handle short sequences under 8 bytes. We distribute the branches a
  694. // bit for short strings.
  695. if (size <= 8) {
  696. if (size >= 4) {
  697. CARBON_MCA_BEGIN("dynamic-8b");
  698. uint64_t data = Read4To8(data_ptr, size);
  699. // We optimize for latency on short strings by hashing both the data and
  700. // size in a single multiply here, using the small nature of size to
  701. // sample a specific sequence of bytes with well distributed bits into one
  702. // side of the multiply. This results in a *statistically* weak hash
  703. // function, but one with very low latency.
  704. //
  705. // Note that we don't drop to the `WeakMix` routine here because we want
  706. // to use sampled random data to encode the size, which may not be as
  707. // effective without the full 128-bit folded result.
  708. buffer = Mix(data ^ buffer, SampleRandomData(size));
  709. CARBON_MCA_END("dynamic-8b");
  710. return;
  711. }
  712. // When we only have 0-3 bytes of string, we can avoid the cost of `Mix`.
  713. // Instead, for empty strings we can just XOR some of our data against the
  714. // existing buffer. For 1-3 byte lengths we do 3 one-byte reads adjusted to
  715. // always read in-bounds without branching. Then we OR the size into the 4th
  716. // byte and use `WeakMix`.
  717. CARBON_MCA_BEGIN("dynamic-4b");
  718. if (size == 0) {
  719. buffer ^= StaticRandomData[0];
  720. } else {
  721. uint64_t data = Read1To3(data_ptr, size) | size << 24;
  722. buffer = WeakMix(data);
  723. }
  724. CARBON_MCA_END("dynamic-4b");
  725. return;
  726. }
  727. if (size <= 16) {
  728. CARBON_MCA_BEGIN("dynamic-16b");
  729. // Similar to the above, we optimize primarily for latency here and spread
  730. // the incoming data across both ends of the multiply. Note that this does
  731. // have a drawback -- any time one half of the mix function becomes zero it
  732. // will fail to incorporate any bits from the other half. However, there is
  733. // exactly 1 in 2^64 values for each side that achieve this, and only when
  734. // the size is exactly 16 -- for smaller sizes there is an overlapping byte
  735. // that makes this impossible unless the seed is *also* incredibly unlucky.
  736. //
  737. // Because this hash function makes no attempt to defend against hash
  738. // flooding, we accept this risk in order to keep the latency low. If this
  739. // becomes a non-flooding problem, we can restrict the size to <16 and send
  740. // the 16-byte case down the next tier of cost.
  741. uint64_t size_hash = SampleRandomData(size);
  742. auto data = Read8To16(data_ptr, size);
  743. buffer = Mix(data.first ^ size_hash, data.second ^ buffer);
  744. CARBON_MCA_END("dynamic-16b");
  745. return;
  746. }
  747. if (size <= 32) {
  748. CARBON_MCA_BEGIN("dynamic-32b");
  749. // Do two mixes of overlapping 16-byte ranges in parallel to minimize
  750. // latency. We also incorporate the size by sampling random data into the
  751. // seed before both.
  752. buffer ^= SampleRandomData(size);
  753. uint64_t m0 = Mix(Read8(data_ptr) ^ StaticRandomData[1],
  754. Read8(data_ptr + 8) ^ buffer);
  755. const std::byte* tail_16b_ptr = data_ptr + (size - 16);
  756. uint64_t m1 = Mix(Read8(tail_16b_ptr) ^ StaticRandomData[3],
  757. Read8(tail_16b_ptr + 8) ^ buffer);
  758. // Just an XOR mix at the end is quite weak here, but we prefer that for
  759. // latency over a more robust approach. Doing another mix with the size (the
  760. // way longer string hashing does) increases the latency on x86-64
  761. // significantly (approx. 20%).
  762. buffer = m0 ^ m1;
  763. CARBON_MCA_END("dynamic-32b");
  764. return;
  765. }
  766. HashSizedBytesLarge(bytes);
  767. }
  768. } // namespace Carbon
  769. #endif // CARBON_COMMON_HASHING_H_