hashing.h 45 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #ifndef CARBON_COMMON_HASHING_H_
  5. #define CARBON_COMMON_HASHING_H_
  6. #include <concepts>
  7. #include <string>
  8. #include <tuple>
  9. #include <type_traits>
  10. #include <utility>
  11. #include "common/check.h"
  12. #include "common/ostream.h"
  13. #include "llvm/ADT/APFloat.h"
  14. #include "llvm/ADT/APInt.h"
  15. #include "llvm/ADT/ArrayRef.h"
  16. #include "llvm/ADT/SmallVector.h"
  17. #include "llvm/ADT/StringRef.h"
  18. #include "llvm/Support/FormatVariadic.h"
  19. #ifdef __ARM_ACLE
  20. #include <arm_acle.h>
  21. #endif
  22. namespace Carbon {
  23. // A 64-bit hash code produced by `Carbon::HashValue`.
  24. //
  25. // This provides methods for extracting high-quality bits from the hash code
  26. // quickly.
  27. //
  28. // This class can also be a hashing input when recursively hashing more complex
  29. // data structures.
  30. class HashCode : public Printable<HashCode> {
  31. public:
  32. HashCode() = default;
  33. constexpr explicit HashCode(uint64_t value) : value_(value) {}
  34. friend constexpr auto operator==(HashCode lhs, HashCode rhs) -> bool {
  35. return lhs.value_ == rhs.value_;
  36. }
  37. friend constexpr auto operator!=(HashCode lhs, HashCode rhs) -> bool {
  38. return lhs.value_ != rhs.value_;
  39. }
  40. // Extracts an index from the hash code as a `ssize_t`. This index covers the
  41. // full range of that type, and may even be negative. Typical usage will
  42. // involve masking this down to some positive range using a bitand with a mask
  43. // computed from a power-of-two size. This routine doesn't do any masking to
  44. // ensure a positive index to avoid redundant computations with the typical
  45. // user of the index.
  46. constexpr auto ExtractIndex() -> ssize_t;
  47. // Extracts an index and a fixed `N`-bit tag from the hash code.
  48. //
  49. // This extracts these values from the position of the hash code which
  50. // maximizes the entropy in the tag and the low bits of the index, as typical
  51. // indices will be further masked down to fall in a smaller range.
  52. //
  53. // `N` must be in the range [1, 32]. The returned index will be in the range
  54. // [0, 2**(64-N)).
  55. template <int N>
  56. constexpr auto ExtractIndexAndTag() -> std::pair<ssize_t, uint32_t>;
  57. // Extract the full 64-bit hash code as an integer.
  58. //
  59. // The methods above should be preferred rather than directly manipulating
  60. // this integer. This is provided primarily to enable Merkle-tree hashing or
  61. // other recursive hashing where that is needed or more efficient.
  62. explicit operator uint64_t() const { return value_; }
  63. auto Print(llvm::raw_ostream& out) const -> void {
  64. out << llvm::formatv("{0:x16}", value_);
  65. }
  66. private:
  67. uint64_t value_ = 0;
  68. };
  69. // Computes a hash code for the provided value, incorporating the provided seed.
  70. //
  71. // The seed doesn't need to be of any particular high quality, but a zero seed
  72. // has bad effects in several places. Prefer the unseeded routine rather than
  73. // providing a zero here.
  74. //
  75. // This **not** a cryptographically secure or stable hash -- it is only designed
  76. // for use with in-memory hash table style data structures. Being fast and
  77. // effective for that use case is the guiding principle of its design.
  78. //
  79. // There is no guarantee that the values produced are stable from execution to
  80. // execution. For speed and quality reasons, the implementation does not
  81. // introduce any variance to defend against accidental dependencies. As a
  82. // consequence, it is strongly encouraged to use a seed that varies from
  83. // execution to execution to avoid depending on specific values produced.
  84. //
  85. // The algorithm used is most heavily based on [Abseil's hashing algorithm][1],
  86. // with some additional ideas and inspiration from the fallback hashing
  87. // algorithm in [Rust's AHash][2] and the [FxHash][3] function. However, there
  88. // are also *significant* changes introduced here.
  89. //
  90. // [1]: https://github.com/abseil/abseil-cpp/tree/master/absl/hash/internal
  91. // [2]: https://github.com/tkaitchuck/aHash/wiki/AHash-fallback-algorithm
  92. // [3]: https://docs.rs/fxhash/latest/fxhash/
  93. //
  94. // This hash algorithm does *not* defend against hash flooding. While it can be
  95. // viewed as "keyed" on the seed, it is expected to be possible to craft inputs
  96. // for some data types that cancel out the seed used and manufacture endlessly
  97. // colliding sets of keys. In general, this function works to be *fast* for hash
  98. // tables. If you need to defend against hash flooding, either directly use a
  99. // data structure with strong worst-case guarantees, or a hash table which
  100. // detects catastrophic collisions and falls back to such a data structure.
  101. //
  102. // This hash function is heavily optimized for *latency* over *quality*. Modern
  103. // hash tables designs can efficiently handle reasonable collision rates,
  104. // including using extra bits from the hash to avoid all efficiency coming from
  105. // the same low bits. Because of this, low-latency is significantly more
  106. // important for performance than high-quality, and this is heavily leveraged.
  107. // The result is that the hash codes produced *do* have significant avalanche
  108. // problems for small keys. The upside is that the latency for hashing integers,
  109. // pointers, and small byte strings (up to 32-bytes) is exceptionally low, and
  110. // essentially a small constant time instruction sequence.
  111. //
  112. // No exotic instruction set extensions are required, and the state used is
  113. // small. It does rely on being able to get the low- and high-64-bit results of
  114. // a 64-bit multiply efficiently.
  115. //
  116. // The function supports many typical data types such as primitives, string-ish
  117. // views, and types composing primitives transparently like pairs, tuples, and
  118. // array-ish views. It is also extensible to support user-defined types.
  119. //
  120. // The builtin support for string-like types include:
  121. // - `std::string_view`
  122. // - `std::string`
  123. // - `llvm::StringRef`
  124. // - `llvm::SmallString`
  125. //
  126. // This function supports heterogeneous lookup between all of the string-like
  127. // types. It also supports heterogeneous lookup between pointer types regardless
  128. // of pointee type and `nullptr`.
  129. //
  130. // However, these are the only heterogeneous lookup support including for the
  131. // builtin in, standard, and LLVM types. Notably, each different size and
  132. // signedness integer type may hash differently for efficiency reasons. Hash
  133. // tables should pick a single integer type in which to manage keys and do
  134. // lookups.
  135. //
  136. // To add support for your type, you need to implement a customization point --
  137. // a free function that can be found by ADL for your type -- called
  138. // `CarbonHashValue` with the following signature:
  139. //
  140. // ```cpp
  141. // auto CarbonHashValue(const YourType& value, uint64_t seed) -> HashCode;
  142. // ```
  143. //
  144. // The extension point needs to ensure that values that compare equal (including
  145. // any comparisons with different types that might be used with a hash table of
  146. // `YourType` keys) produce the same `HashCode` values.
  147. //
  148. // `HashCode` values should typically be produced using the `Hasher` helper type
  149. // below. See its documentation for more details about implementing these
  150. // customization points and how best to incorporate the value's state into a
  151. // `HashCode`.
  152. //
  153. // For two input values that are almost but not quite equal, the extension
  154. // point should maximize the probability of each bit of their resulting
  155. // `HashCode`s differing. More formally, `HashCode`s should exhibit an
  156. // [avalanche effect][4]. However, while this is desirable, it should be
  157. // **secondary** to low latency. The intended use case of these functions is not
  158. // cryptography but in-memory hashtables where the latency and overhead of
  159. // computing the `HashCode` is *significantly* more important than achieving a
  160. // particularly high quality. The goal is to have "just enough" avalanche
  161. // effect, but there is not a fixed criteria for how much is enough. That should
  162. // be determined through practical experimentation with a hashtable and
  163. // distribution of keys.
  164. //
  165. // [4]: https://en.wikipedia.org/wiki/Avalanche_effect
  166. template <typename T>
  167. inline auto HashValue(const T& value, uint64_t seed) -> HashCode;
  168. // The same as the seeded version of `HashValue` but without callers needing to
  169. // provide a seed.
  170. //
  171. // Generally prefer the seeded version, but this is available if there is no
  172. // reasonable seed. In particular, this will behave better than using a seed of
  173. // `0`. One important use case is for recursive hashing of sub-objects where
  174. // appropriate or needed.
  175. template <typename T>
  176. inline auto HashValue(const T& value) -> HashCode;
  177. // Object and APIs that eventually produce a hash code.
  178. //
  179. // This type is primarily used by types to implement a customization point
  180. // `CarbonHashValue` that will in turn be used by the `HashValue` function. See
  181. // the `HashValue` function for details of that extension point.
  182. //
  183. // The methods on this type can be used to incorporate data from your
  184. // user-defined type into its internal state which can be converted to a
  185. // `HashCode` at any time. These methods will only produce the same `HashCode`
  186. // if they are called in the exact same order with the same arguments -- there
  187. // are no guaranteed equivalences between calling different methods.
  188. //
  189. // Example usage:
  190. // ```cpp
  191. // auto CarbonHashValue(const MyType& value, uint64_t seed) -> HashCode {
  192. // Hasher hasher(seed);
  193. // hasher.HashTwo(value.x, value.y);
  194. // return static_cast<HashCode>(hasher);
  195. // }
  196. // ```
  197. //
  198. // This type's API also reflects the reality that high-performance hash tables
  199. // are used with keys that are generally small and cheap to hash.
  200. //
  201. // To ensure this type's code is optimized effectively, it should typically be
  202. // used as a local variable and not passed across function boundaries
  203. // unnecessarily.
  204. //
  205. // The type also provides a number of static helper functions and static data
  206. // members that may be used by authors of `CarbonHashValue` implementations to
  207. // efficiently compute the inputs to the core `Hasher` methods, or even to
  208. // manually do some amounts of hashing in performance-tuned ways outside of the
  209. // methods provided.
  210. class Hasher {
  211. public:
  212. Hasher() = default;
  213. explicit Hasher(uint64_t seed) : buffer(seed) {}
  214. Hasher(Hasher&& arg) = default;
  215. Hasher(const Hasher& arg) = delete;
  216. auto operator=(Hasher&& rhs) -> Hasher& = default;
  217. // Extracts the current state as a `HashCode` for use.
  218. explicit operator HashCode() const { return HashCode(buffer); }
  219. // Incorporates a variable number of objects into the `hasher`s state.
  220. //
  221. // The `values` here can be anything hashable, and this routine handles
  222. // recursively hashing a single value as appropriate and then in turn
  223. // incorporating that. However, it is optimized for relatively small numbers
  224. // of values and/or small elements. A large tree structure will be better
  225. // handled by a dedicated Merkle-tree decomposition rather than the ad-hoc one
  226. // provided here. This routine's decomposition is mostly useful for combining
  227. // N small bits of data with one recursively hashed entity.
  228. //
  229. // There is no guaranteed correspondence between the behavior of a single call
  230. // with multiple parameters and multiple calls.
  231. template <typename... Ts>
  232. auto Hash(const Ts&... values) -> void;
  233. // Incorporates an array of objects into the hasher's state.
  234. //
  235. // Similar to the variadic `Hash`, this will handle recursively hashing if
  236. // necessary, but is optimized to avoid it when possible and is especially
  237. // efficient when hashing a raw array of bytes.
  238. //
  239. // Note that this is especially inefficient when it must recursively hash for
  240. // long arrays -- that pattern should be avoided if possible. It is usually
  241. // more effective to optimize that pattern at a higher level with a dedicated
  242. // hashing implementation.
  243. template <typename T>
  244. auto HashArray(llvm::ArrayRef<T> values) -> void;
  245. // Incorporates an object into the hasher's state by hashing its object
  246. // representation. Requires `value`'s type to have a unique object
  247. // representation. This is primarily useful for builtin and primitive types.
  248. //
  249. // This can be directly used for simple users combining some aggregation of
  250. // objects. However, when possible, prefer the variadic version below for
  251. // aggregating several primitive types into a hash.
  252. template <typename T>
  253. requires std::has_unique_object_representations_v<T>
  254. auto HashRaw(const T& value) -> void;
  255. // Simpler and more primitive functions to incorporate state represented in
  256. // `uint64_t` values into the hasher's state.
  257. //
  258. // These may be slightly less efficient than the `Hash` method above for a
  259. // typical application code `uint64_t`, but are designed to work well even
  260. // when relevant data has been packed into the `uint64_t` parameters densely.
  261. auto HashDense(uint64_t data) -> void;
  262. auto HashDense(uint64_t data0, uint64_t data1) -> void;
  263. // A heavily optimized routine for incorporating a dynamically sized sequence
  264. // of bytes into the hasher's state.
  265. //
  266. // This routine has carefully structured inline code paths for short byte
  267. // sequences and a reasonably high bandwidth code path for longer sequences.
  268. // The size of the byte sequence is always incorporated into the hasher's
  269. // state along with the contents.
  270. auto HashSizedBytes(llvm::ArrayRef<std::byte> bytes) -> void;
  271. // Incorporate a dynamically sized sequence of bytes represented as an array
  272. // of objects into the hasher's state.
  273. template <typename T>
  274. requires std::has_unique_object_representations_v<T>
  275. auto HashSizedBytes(llvm::ArrayRef<T> data) -> void {
  276. HashSizedBytes(llvm::ArrayRef<std::byte>(
  277. reinterpret_cast<const std::byte*>(data.data()),
  278. data.size() * sizeof(T)));
  279. }
  280. // An out-of-line, throughput-optimized routine for incorporating a
  281. // dynamically sized sequence when the sequence size is guaranteed to be >32.
  282. // The size is always incorporated into the state.
  283. auto HashSizedBytesLarge(llvm::ArrayRef<std::byte> bytes) -> void;
  284. // Utility functions to read data of various sizes efficiently into a
  285. // 64-bit value. These pointers need-not be aligned, and can alias other
  286. // objects. The representation of the read data in the `uint64_t` returned is
  287. // not stable or guaranteed.
  288. static auto Read1(const std::byte* data) -> uint64_t;
  289. static auto Read2(const std::byte* data) -> uint64_t;
  290. static auto Read4(const std::byte* data) -> uint64_t;
  291. static auto Read8(const std::byte* data) -> uint64_t;
  292. // Similar to the `ReadN` functions, but supports reading a range of different
  293. // bytes provided by the size *without branching on the size*. The lack of
  294. // branches is often key, and the code in these routines works to be efficient
  295. // in extracting a *dynamic* size of bytes into the returned `uint64_t`. There
  296. // may be overlap between different routines, because these routines are based
  297. // on different implementation techniques that do have some overlap in the
  298. // range of sizes they can support. Which routine is the most efficient for a
  299. // size in the overlap isn't trivial, and so these primitives are provided
  300. // as-is and should be selected based on the localized generated code and
  301. // benchmarked performance.
  302. static auto Read1To3(const std::byte* data, ssize_t size) -> uint64_t;
  303. static auto Read4To8(const std::byte* data, ssize_t size) -> uint64_t;
  304. static auto Read8To16(const std::byte* data, ssize_t size)
  305. -> std::pair<uint64_t, uint64_t>;
  306. // Reads the underlying object representation of a type into a 64-bit integer
  307. // efficiently. Only supports types with unique object representation and at
  308. // most 8-bytes large. This is typically used to read primitive types.
  309. template <typename T>
  310. requires std::has_unique_object_representations_v<T> && (sizeof(T) <= 8)
  311. static auto ReadSmall(const T& value) -> uint64_t;
  312. // The core of the hash algorithm is this mix function. The specific
  313. // operations are not guaranteed to be stable but are described here for
  314. // hashing authors to understand what to expect.
  315. //
  316. // Currently, this uses the same "mix" operation as in Abseil, AHash, and
  317. // several other hashing algorithms. It takes two 64-bit integers, and
  318. // multiplies them, capturing both the high 64-bit result and the low 64-bit
  319. // result, and then XOR-ing those two halves together.
  320. //
  321. // A consequence of this operation is that a zero on either side will fail to
  322. // incorporate any bits from the other side. Often, this is an acceptable rate
  323. // of collision in practice. But it is worth being aware of and working to
  324. // avoid common paths encountering this. For example, naively used this might
  325. // cause different length all-zero byte strings to hash the same, essentially
  326. // losing the length in the composition of the hash for a likely important
  327. // case of byte sequence.
  328. //
  329. // Another consequence of the particular implementation is that it is useful
  330. // to have a reasonable distribution of bits throughout both sides of the
  331. // multiplication. However, it is not *necessary* as we do capture the
  332. // complete 128-bit result. Where reasonable, the caller should XOR random
  333. // data into operands before calling `Mix` to try and increase the
  334. // distribution of bits feeding the multiply.
  335. static auto Mix(uint64_t lhs, uint64_t rhs) -> uint64_t;
  336. // An alternative to `Mix` that is significantly weaker but also lower
  337. // latency. It should not be used when the input `uint64_t` is densely packed
  338. // with data, but is a good option for hashing a single integer or pointer
  339. // where the full 64-bits are sparsely populated and especially the high bits
  340. // are often invariant between interestingly different values.
  341. //
  342. // This uses just the low 64-bit result of a multiply. It ensures the operand
  343. // is good at diffusing bits, but inherently the high bits of the input will
  344. // be (significantly) less often represented in the output. It also does some
  345. // reversal to ensure the *low* bits of the result are the most useful ones.
  346. static auto WeakMix(uint64_t value) -> uint64_t;
  347. // We have a 64-byte random data pool designed to fit on a single cache line.
  348. // This routine allows sampling it at byte indices, which allows getting 64 -
  349. // 8 different random 64-bit results. The offset must be in the range [0, 56).
  350. static auto SampleRandomData(ssize_t offset) -> uint64_t {
  351. CARBON_DCHECK(offset + sizeof(uint64_t) < sizeof(StaticRandomData));
  352. uint64_t data;
  353. memcpy(&data,
  354. reinterpret_cast<const unsigned char*>(&StaticRandomData) + offset,
  355. sizeof(data));
  356. return data;
  357. }
  358. // As above, but for small offsets, we can use aligned loads, which are
  359. // faster. The offset must be in the range [0, 8).
  360. static auto SampleAlignedRandomData(ssize_t offset) -> uint64_t {
  361. CARBON_DCHECK(static_cast<size_t>(offset) <
  362. sizeof(StaticRandomData) / sizeof(uint64_t));
  363. return StaticRandomData[offset];
  364. }
  365. // Random data taken from the hexadecimal digits of Pi's fractional component,
  366. // written in lexical order for convenience of reading. The resulting
  367. // byte-stream will be different due to little-endian integers. These can be
  368. // used directly for convenience rather than calling `SampleRandomData`, but
  369. // be aware that this is the underlying pool. The goal is to reuse the same
  370. // single cache-line of constant data.
  371. //
  372. // The initializers here can be generated with the following shell script,
  373. // which will generate 8 64-bit values and one more digit. The `bc` command's
  374. // decimal based scaling means that without getting at least some extra hex
  375. // digits rendered there will be rounding that we don't want so the script
  376. // below goes on to produce one more hex digit ensuring the 8 initializers
  377. // aren't rounded in any way. Using a higher scale won't cause the 8
  378. // initializers here to change further.
  379. //
  380. // ```sh
  381. // echo 'obase=16; scale=155; 4*a(1)' | env BC_LINE_LENGTH=500 bc -l \
  382. // | cut -c 3- | tr '[:upper:]' '[:lower:]' \
  383. // | sed -e "s/.\{4\}/&'/g" \
  384. // | sed -e "s/\(.\{4\}'.\{4\}'.\{4\}'.\{4\}\)'/0x\1,\n/g"
  385. // ```
  386. alignas(64) static constexpr std::array<uint64_t, 8> StaticRandomData = {
  387. 0x243f'6a88'85a3'08d3, 0x1319'8a2e'0370'7344, 0xa409'3822'299f'31d0,
  388. 0x082e'fa98'ec4e'6c89, 0x4528'21e6'38d0'1377, 0xbe54'66cf'34e9'0c6c,
  389. 0xc0ac'29b7'c97c'50dd, 0x3f84'd5b5'b547'0917,
  390. };
  391. // We need a multiplicative hashing constant for both 64-bit multiplicative
  392. // hashing fast paths and some other 128-bit folded multiplies. We use an
  393. // empirically better constant compared to Knuth's, Rust's FxHash, and others
  394. // we've tried. It was found by a search of uniformly distributed odd numbers
  395. // and examining them for desirable properties when used as a multiplicative
  396. // hash, however our search seems largely to have been lucky rather than
  397. // having a highly effective set of criteria. We evaluated this constant by
  398. // integrating this hash function with a hashtable and looking at the
  399. // collision rates of several different but very fundamental patterns of keys:
  400. // integers counting from 0, pointers allocated on the heap, and strings with
  401. // character and size distributions matching C-style ASCII identifiers.
  402. // Different constants found with this search worked better or less well, but
  403. // fairly consistently across the different types of keys. At the end, far and
  404. // away the best behaved constant we found was one of the first ones in the
  405. // search and is what we use here.
  406. //
  407. // For reference, some other constants include one derived by diving 2^64 by
  408. // Phi: 0x9e37'79b9'7f4a'7c15U -- see these sites for details:
  409. // https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
  410. // https://book.huihoo.com/data-structures-and-algorithms-with-object-oriented-design-patterns-in-c++/html/page214.html
  411. //
  412. // Another very good constant derived by minimizing repeating bit patterns is
  413. // 0xdcb2'2ca6'8cb1'34edU and its bit-reversed form. However, this constant
  414. // has observed frequent issues at roughly 4k pointer keys, connected to a
  415. // common hashtable seed also being a pointer. These issues appear to occur
  416. // both more often and have a larger impact relative to the number of keys
  417. // than the rare cases where some combinations of pointer seeds and pointer
  418. // keys create minor quality issues with the constant we use.
  419. static constexpr uint64_t MulConstant = 0x7924'f9e0'de1e'8cf5U;
  420. private:
  421. uint64_t buffer;
  422. };
  423. // A dedicated namespace for `CarbonHashValue` overloads that are not found by
  424. // ADL with their associated types. For example, primitive type overloads or
  425. // overloads for types in LLVM's libraries.
  426. //
  427. // Note that these are internal implementation details and **not** part of the
  428. // public API. They should not be used directly by client code.
  429. namespace InternalHashDispatch {
  430. template <typename T>
  431. inline auto CarbonHashValue(llvm::ArrayRef<T> values, uint64_t seed)
  432. -> HashCode {
  433. Hasher hasher(seed);
  434. hasher.HashArray(values);
  435. return static_cast<HashCode>(hasher);
  436. }
  437. inline auto CarbonHashValue(llvm::ArrayRef<std::byte> bytes, uint64_t seed)
  438. -> HashCode {
  439. Hasher hasher(seed);
  440. hasher.HashSizedBytes(bytes);
  441. return static_cast<HashCode>(hasher);
  442. }
  443. // Hashing implementation for `llvm::StringRef`. We forward all the other
  444. // string-like types that support heterogeneous lookup to this one.
  445. inline auto CarbonHashValue(llvm::StringRef value, uint64_t seed) -> HashCode {
  446. return CarbonHashValue(
  447. llvm::ArrayRef(reinterpret_cast<const std::byte*>(value.data()),
  448. value.size()),
  449. seed);
  450. }
  451. inline auto CarbonHashValue(std::string_view value, uint64_t seed) -> HashCode {
  452. return CarbonHashValue(llvm::StringRef(value.data(), value.size()), seed);
  453. }
  454. inline auto CarbonHashValue(const std::string& value, uint64_t seed)
  455. -> HashCode {
  456. return CarbonHashValue(llvm::StringRef(value.data(), value.size()), seed);
  457. }
  458. template <unsigned Length>
  459. inline auto CarbonHashValue(const llvm::SmallString<Length>& value,
  460. uint64_t seed) -> HashCode {
  461. return CarbonHashValue(llvm::StringRef(value.data(), value.size()), seed);
  462. }
  463. // Support types that are array-like by building an `llvm::ArrayRef` out of
  464. // them. We can't do this by accepting any type convertible to an `ArrayRef`
  465. // because that type supports building a synthetic array out of any single
  466. // element.
  467. template <typename T>
  468. inline auto CarbonHashValue(const std::vector<T>& arg, uint64_t seed)
  469. -> HashCode {
  470. return CarbonHashValue(llvm::ArrayRef(arg), seed);
  471. }
  472. template <typename T>
  473. inline auto CarbonHashValue(const llvm::SmallVectorImpl<T>& arg, uint64_t seed)
  474. -> HashCode {
  475. return CarbonHashValue(llvm::ArrayRef(arg), seed);
  476. }
  477. template <typename T, size_t N>
  478. inline auto CarbonHashValue(const std::array<T, N>& arg, uint64_t seed)
  479. -> HashCode {
  480. return CarbonHashValue(llvm::ArrayRef(arg), seed);
  481. }
  482. template <typename T, size_t N>
  483. inline auto CarbonHashValue(const T (&arg)[N], uint64_t seed) -> HashCode {
  484. return CarbonHashValue(llvm::ArrayRef(arg), seed);
  485. }
  486. inline auto CarbonHashValue(llvm::APInt value, uint64_t seed) -> HashCode {
  487. Hasher hasher(seed);
  488. if (LLVM_LIKELY(value.isSingleWord())) {
  489. hasher.Hash(value.getBitWidth(), value.getZExtValue());
  490. } else {
  491. hasher.HashRaw(value.getBitWidth());
  492. hasher.HashSizedBytes(
  493. llvm::ArrayRef(value.getRawData(), value.getNumWords()));
  494. }
  495. return static_cast<HashCode>(hasher);
  496. }
  497. inline auto CarbonHashValue(llvm::APFloat value, uint64_t seed) -> HashCode {
  498. Hasher hasher(seed);
  499. // Hashing floating point numbers is complex and depends on the specific
  500. // internal semantics of `APFloat`, so delegate to the LLVM hashing framework
  501. // here. We re-hash the result to mix in our seed. All of this is a bit
  502. // inefficient, and we can revisit this to provide a dedicated implementation
  503. // if it becomes a bottleneck.
  504. using llvm::hash_value;
  505. hasher.HashRaw(hash_value(value));
  506. return static_cast<HashCode>(hasher);
  507. }
  508. template <typename... Ts>
  509. inline auto CarbonHashValue(const std::tuple<Ts...>& value, uint64_t seed)
  510. -> HashCode {
  511. Hasher hasher(seed);
  512. std::apply([&](const auto&... args) { hasher.Hash(args...); }, value);
  513. return static_cast<HashCode>(hasher);
  514. }
  515. template <typename T, typename U>
  516. inline auto CarbonHashValue(const std::pair<T, U>& value, uint64_t seed)
  517. -> HashCode {
  518. Hasher hasher(seed);
  519. hasher.Hash(value.first, value.second);
  520. return static_cast<HashCode>(hasher);
  521. }
  522. // Implementation detail predicate to detect if there is a `CarbonHashValue`
  523. // overload available for a particular type, either in this namespace or found
  524. // via ADL. Note that this should not be moved above any overloads.
  525. template <typename T>
  526. concept HasCarbonHashValue = requires(const T& value, uint64_t seed) {
  527. { CarbonHashValue(value, seed) } -> std::same_as<HashCode>;
  528. };
  529. // C++ guarantees this is true for the unsigned variants, but we require it for
  530. // signed variants and pointers.
  531. static_assert(std::has_unique_object_representations_v<int8_t>);
  532. static_assert(std::has_unique_object_representations_v<int16_t>);
  533. static_assert(std::has_unique_object_representations_v<int32_t>);
  534. static_assert(std::has_unique_object_representations_v<int64_t>);
  535. static_assert(std::has_unique_object_representations_v<void*>);
  536. // Overloaded function to provide mappings or conversions required to types that
  537. // should be hashed as plain data but where can't directly examine the storage.
  538. //
  539. // For example, C++ uses `std::nullptr_t` but unfortunately doesn't make it have
  540. // a unique object representation. To address that, we need a function that
  541. // converts `nullptr` back into a `void*` that will have a unique object
  542. // representation. And this needs to be done by-value as we need to build a
  543. // temporary object to return, which requires a separate overload rather than
  544. // just using a type function that could be used in parallel in the predicate
  545. // below. Instead, we build the predicate independently of the mapping overload,
  546. // but together they should produce the correct result.
  547. template <typename T>
  548. inline auto MapToRawDataType(const T& value) -> const T& {
  549. // This overload should never be selected for `std::nullptr_t`, so
  550. // static_assert to get some better compiler error messages.
  551. static_assert(!std::same_as<T, std::nullptr_t>);
  552. // NOLINTNEXTLINE(bugprone-return-const-ref-from-parameter)
  553. return value;
  554. }
  555. inline auto MapToRawDataType(std::nullptr_t /*value*/) -> const void* {
  556. return nullptr;
  557. }
  558. // Implementation detail predicate to detect if we can hash as a raw data type.
  559. // When used, it should be combined with our mapping function `MapToRawDataType`
  560. // to handle any necessary edge cases that don't directly work.
  561. template <typename T>
  562. concept CanHashAsRawDataType = std::same_as<T, std::nullptr_t> ||
  563. std::has_unique_object_representations_v<T>;
  564. // Implementation of the unqualified dispatch to any provided `CarbonHashValue`
  565. // overloads, either here, or via ADL. Note that similar to
  566. // `HasCarbonHashValue`, this must not be moved above any of those overloads.
  567. template <typename T>
  568. inline auto DispatchImpl(const T& value, uint64_t seed) -> HashCode {
  569. // If we have an explicit overload for `CarbonHashValue`, call it. This may be
  570. // provided above or via ADL, and is preferred as it represents an explicit
  571. // request for how the type is hashed.
  572. if constexpr (HasCarbonHashValue<T>) {
  573. return CarbonHashValue(value, seed);
  574. } else if constexpr (CanHashAsRawDataType<T>) {
  575. // There was no explicit overload to call, but the type allows us to hash it
  576. // as raw data, do so.
  577. Hasher hasher(seed);
  578. hasher.HashRaw(MapToRawDataType(value));
  579. return static_cast<HashCode>(hasher);
  580. } else {
  581. // We can only synthesize hashing for types that are hashable as raw data.
  582. // This type isn't so fail a static assert due to the lack of an overload.
  583. // We use the concept here to try and get the best diagnostics we can about
  584. // candidates.
  585. static_assert(HasCarbonHashValue<T>,
  586. "Attempted to hash a type which does not have a "
  587. "`CarbonHashValue` overload.");
  588. }
  589. }
  590. } // namespace InternalHashDispatch
  591. template <typename T>
  592. inline auto HashValue(const T& value, uint64_t seed) -> HashCode {
  593. return InternalHashDispatch::DispatchImpl(value, seed);
  594. }
  595. template <typename T>
  596. inline auto HashValue(const T& value) -> HashCode {
  597. // When a seed isn't provided, use the last 64-bit chunk of random data. Other
  598. // chunks (especially the first) are more often XOR-ed with the seed and risk
  599. // cancelling each other out and feeding a zero to a `Mix` call in a way that
  600. // sharply increasing collisions.
  601. return HashValue(value, Hasher::StaticRandomData[7]);
  602. }
  603. constexpr auto HashCode::ExtractIndex() -> ssize_t { return value_; }
  604. template <int N>
  605. constexpr auto HashCode::ExtractIndexAndTag() -> std::pair<ssize_t, uint32_t> {
  606. static_assert(N >= 1);
  607. static_assert(N < 32);
  608. return {static_cast<ssize_t>(value_ >> N),
  609. static_cast<uint32_t>(value_ & ((1U << N) - 1))};
  610. }
  611. // Building with `-DCARBON_MCA_MARKERS` will enable `llvm-mca` annotations in
  612. // the source code. These can interfere with optimization, but allows analyzing
  613. // the generated `.s` file with the `llvm-mca` tool. Documentation for these
  614. // markers is here:
  615. // https://llvm.org/docs/CommandGuide/llvm-mca.html#using-markers-to-analyze-specific-code-blocks
  616. #if CARBON_MCA_MARKERS
  617. #define CARBON_MCA_BEGIN(NAME) \
  618. __asm volatile("# LLVM-MCA-BEGIN " NAME "" ::: "memory");
  619. #define CARBON_MCA_END(NAME) \
  620. __asm volatile("# LLVM-MCA-END " NAME "" ::: "memory");
  621. #else
  622. #define CARBON_MCA_BEGIN(NAME)
  623. #define CARBON_MCA_END(NAME)
  624. #endif
  625. inline auto Hasher::Read1(const std::byte* data) -> uint64_t {
  626. uint8_t result;
  627. std::memcpy(&result, data, sizeof(result));
  628. return result;
  629. }
  630. inline auto Hasher::Read2(const std::byte* data) -> uint64_t {
  631. uint16_t result;
  632. std::memcpy(&result, data, sizeof(result));
  633. return result;
  634. }
  635. inline auto Hasher::Read4(const std::byte* data) -> uint64_t {
  636. uint32_t result;
  637. std::memcpy(&result, data, sizeof(result));
  638. return result;
  639. }
  640. inline auto Hasher::Read8(const std::byte* data) -> uint64_t {
  641. uint64_t result;
  642. std::memcpy(&result, data, sizeof(result));
  643. return result;
  644. }
  645. inline auto Hasher::Read1To3(const std::byte* data, ssize_t size) -> uint64_t {
  646. // Use carefully crafted indexing to avoid branches on the exact size while
  647. // reading.
  648. uint64_t byte0 = static_cast<uint8_t>(data[0]);
  649. uint64_t byte1 = static_cast<uint8_t>(data[size - 1]);
  650. uint64_t byte2 = static_cast<uint8_t>(data[size >> 1]);
  651. return (byte0 << 8) | (byte1 << 16) | byte2;
  652. }
  653. inline auto Hasher::Read4To8(const std::byte* data, ssize_t size) -> uint64_t {
  654. uint32_t low;
  655. std::memcpy(&low, data, sizeof(low));
  656. uint32_t high;
  657. std::memcpy(&high, data + size - sizeof(high), sizeof(high));
  658. return (static_cast<uint64_t>(low) << 32) | high;
  659. }
  660. inline auto Hasher::Read8To16(const std::byte* data, ssize_t size)
  661. -> std::pair<uint64_t, uint64_t> {
  662. uint64_t low;
  663. std::memcpy(&low, data, sizeof(low));
  664. uint64_t high;
  665. std::memcpy(&high, data + size - sizeof(high), sizeof(high));
  666. return {low, high};
  667. }
  668. inline auto Hasher::Mix(uint64_t lhs, uint64_t rhs) -> uint64_t {
  669. // Use the C23 extended integer support that Clang provides as a general
  670. // language extension.
  671. using U128 = unsigned _BitInt(128);
  672. U128 result = static_cast<U128>(lhs) * static_cast<U128>(rhs);
  673. return static_cast<uint64_t>(result) ^ static_cast<uint64_t>(result >> 64);
  674. }
  675. inline auto Hasher::WeakMix(uint64_t value) -> uint64_t {
  676. value *= MulConstant;
  677. #ifdef __ARM_ACLE
  678. // Arm has a fast bit-reversal that gives us the optimal distribution.
  679. value = __rbitll(value);
  680. #else
  681. // Otherwise, assume an optimized BSWAP such as x86's. That's close enough.
  682. value = __builtin_bswap64(value);
  683. #endif
  684. return value;
  685. }
  686. inline auto Hasher::HashDense(uint64_t data) -> void {
  687. // When hashing exactly one 64-bit entity use the Phi-derived constant as this
  688. // is just multiplicative hashing. The initial buffer is mixed on input to
  689. // pipeline with materializing the constant.
  690. buffer = Mix(data ^ buffer, MulConstant);
  691. }
  692. inline auto Hasher::HashDense(uint64_t data0, uint64_t data1) -> void {
  693. // When hashing two chunks of data at the same time, we XOR it with random
  694. // data to avoid common inputs from having especially bad multiplicative
  695. // effects. We also XOR in the starting buffer as seed or to chain. Note that
  696. // we don't use *consecutive* random data 64-bit values to avoid a common
  697. // compiler "optimization" of loading both 64-bit chunks into a 128-bit vector
  698. // and doing the XOR in the vector unit. The latency of extracting the data
  699. // afterward eclipses any benefit. Callers will routinely have two consecutive
  700. // data values here, but using non-consecutive keys avoids any vectorization
  701. // being tempting.
  702. //
  703. // XOR-ing both the incoming state and a random word over the second data is
  704. // done to pipeline with materializing the constants and is observed to have
  705. // better performance than XOR-ing after the mix.
  706. //
  707. // This roughly matches the mix pattern used in the larger mixing routines
  708. // from Abseil, which is a more minimal form than used in other algorithms
  709. // such as AHash and seems adequate for latency-optimized use cases.
  710. buffer =
  711. Mix(data0 ^ StaticRandomData[1], data1 ^ StaticRandomData[3] ^ buffer);
  712. }
  713. template <typename T>
  714. requires std::has_unique_object_representations_v<T> && (sizeof(T) <= 8)
  715. inline auto Hasher::ReadSmall(const T& value) -> uint64_t {
  716. const auto* storage = reinterpret_cast<const std::byte*>(&value);
  717. if constexpr (sizeof(T) == 1) {
  718. return Read1(storage);
  719. } else if constexpr (sizeof(T) == 2) {
  720. return Read2(storage);
  721. } else if constexpr (sizeof(T) == 3) {
  722. return Read2(storage) | (Read1(&storage[2]) << 16);
  723. } else if constexpr (sizeof(T) == 4) {
  724. return Read4(storage);
  725. } else if constexpr (sizeof(T) == 5) {
  726. return Read4(storage) | (Read1(&storage[4]) << 32);
  727. } else if constexpr (sizeof(T) == 6 || sizeof(T) == 7) {
  728. // Use overlapping 4-byte reads for 6 and 7 bytes.
  729. return Read4(storage) | (Read4(&storage[sizeof(T) - 4]) << 32);
  730. } else if constexpr (sizeof(T) == 8) {
  731. return Read8(storage);
  732. } else {
  733. static_assert(sizeof(T) <= 8);
  734. }
  735. }
  736. template <typename... Ts>
  737. inline auto Hasher::Hash(const Ts&... values) -> void {
  738. if constexpr (sizeof...(Ts) == 0) {
  739. buffer ^= StaticRandomData[0];
  740. return;
  741. }
  742. using InternalHashDispatch::CanHashAsRawDataType;
  743. using InternalHashDispatch::HasCarbonHashValue;
  744. using InternalHashDispatch::MapToRawDataType;
  745. // Special-case a single element tuple that we will hash as raw data.
  746. if constexpr (sizeof...(Ts) == 1 && (... && (!HasCarbonHashValue<Ts> &&
  747. CanHashAsRawDataType<Ts>))) {
  748. HashRaw(MapToRawDataType(values)...);
  749. return;
  750. }
  751. // Map each value into a uint64_t, either by hashing it using any custom hash
  752. // function required, reading its data into a 64-bit value, or if large
  753. // hashing it as raw data and using that hash code as the 64-bit data. This
  754. // mirrors the logic in `InternalHashDispatch::DispatchImpl`, but minimizes
  755. // early hashing of anything small we can just read as data. While this may be
  756. // a little bit wasteful in some cases, collapsing down to a flat array of
  757. // 64-bit integers is more efficient to hash.
  758. auto map_value = []<typename T>(const T& value) -> uint64_t {
  759. if constexpr (HasCarbonHashValue<T>) {
  760. // Use the top-level `HashValue` to re-dispatch to the custom
  761. // implementation with a fixed seed.
  762. return static_cast<uint64_t>(HashValue(value));
  763. } else if constexpr (CanHashAsRawDataType<T>) {
  764. auto raw_value = MapToRawDataType(value);
  765. if constexpr (sizeof(raw_value) <= 8) {
  766. return ReadSmall(raw_value);
  767. } else {
  768. // Use the top-level `HashValue` to pick up a good fixed seed and hash
  769. // this large object as raw data.
  770. return static_cast<uint64_t>(HashValue(raw_value));
  771. }
  772. } else {
  773. // We can only synthesize hashing for types that are hashable as raw data.
  774. // This type isn't so fail a static assert due to the lack of an overload.
  775. // We use the concept here to try and get the best diagnostics we can
  776. // about candidates.
  777. static_assert(HasCarbonHashValue<T>,
  778. "Attempted to hash a type which does not have a "
  779. "`CarbonHashValue` overload.");
  780. }
  781. };
  782. const uint64_t data[] = {map_value(values)...};
  783. if constexpr (sizeof...(Ts) == 2) {
  784. HashDense(data[0], data[1]);
  785. return;
  786. }
  787. HashRaw(data);
  788. }
  789. template <typename T>
  790. inline auto Hasher::HashArray(llvm::ArrayRef<T> values) -> void {
  791. using InternalHashDispatch::CanHashAsRawDataType;
  792. using InternalHashDispatch::HasCarbonHashValue;
  793. // This logic similarly mirrors `InternalHashDispatch::DispatchImpl`, but is
  794. // specialized here to allow us to efficiently process the array when it
  795. // *doesn't* require recursive hashing.
  796. if constexpr (HasCarbonHashValue<T>) {
  797. // Use a trivial loop to give consistent behavior for arrays requiring
  798. // recursive hashing. This isn't terribly efficient, but if clients care
  799. // they should specialize the entire hashing operation. For simple, tiny
  800. // cases, this avoids an awkward functionality cliff.
  801. for (const T& value : values) {
  802. HashDense(static_cast<uint64_t>(HashValue(value)));
  803. }
  804. HashRaw(values.size());
  805. } else if constexpr (std::has_unique_object_representations_v<T>) {
  806. // This code is a narrow special case for `CanHashAsRawDataType` that we can
  807. // further hash the underlying storage directly. We check that it is a
  808. // subset.
  809. static_assert(CanHashAsRawDataType<T>);
  810. HashSizedBytes(values);
  811. } else {
  812. // We can only synthesize hashing for types that are hashable as raw data.
  813. // This type isn't so fail a static assert due to the lack of an overload.
  814. // We use the concept here to try and get the best diagnostics we can
  815. // about candidates.
  816. static_assert(HasCarbonHashValue<T>,
  817. "Attempted to hash a type which does not have a "
  818. "`CarbonHashValue` overload.");
  819. }
  820. }
  821. template <typename T>
  822. requires std::has_unique_object_representations_v<T>
  823. inline auto Hasher::HashRaw(const T& value) -> void {
  824. if constexpr (sizeof(T) <= 8) {
  825. // For types size 8-bytes and smaller directly being hashed (as opposed to
  826. // 8-bytes potentially bit-packed with data), we rarely expect the incoming
  827. // data to fully and densely populate all 8 bytes. For these cases we have a
  828. // `WeakMix` routine that is lower latency but lower quality.
  829. CARBON_MCA_BEGIN("fixed-8b");
  830. buffer = WeakMix(buffer ^ ReadSmall(value));
  831. CARBON_MCA_END("fixed-8b");
  832. return;
  833. }
  834. const auto* data_ptr = reinterpret_cast<const std::byte*>(&value);
  835. if constexpr (8 < sizeof(T) && sizeof(T) <= 16) {
  836. CARBON_MCA_BEGIN("fixed-16b");
  837. auto values = Read8To16(data_ptr, sizeof(T));
  838. HashDense(values.first, values.second);
  839. CARBON_MCA_END("fixed-16b");
  840. return;
  841. }
  842. if constexpr (16 < sizeof(T) && sizeof(T) <= 32) {
  843. CARBON_MCA_BEGIN("fixed-32b");
  844. // Essentially the same technique used for dynamically sized byte sequences
  845. // of this size, but we start with a fixed XOR of random data.
  846. buffer ^= StaticRandomData[0];
  847. uint64_t m0 = Mix(Read8(data_ptr) ^ StaticRandomData[1],
  848. Read8(data_ptr + 8) ^ buffer);
  849. const std::byte* tail_16b_ptr = data_ptr + (sizeof(T) - 16);
  850. uint64_t m1 = Mix(Read8(tail_16b_ptr) ^ StaticRandomData[3],
  851. Read8(tail_16b_ptr + 8) ^ buffer);
  852. buffer = m0 ^ m1;
  853. CARBON_MCA_END("fixed-32b");
  854. return;
  855. }
  856. // Hashing the size isn't relevant here, but is harmless, so fall back to a
  857. // common code path.
  858. HashSizedBytesLarge(llvm::ArrayRef<std::byte>(data_ptr, sizeof(T)));
  859. }
  860. inline auto Hasher::HashSizedBytes(llvm::ArrayRef<std::byte> bytes) -> void {
  861. const std::byte* data_ptr = bytes.data();
  862. const ssize_t size = bytes.size();
  863. // First handle short sequences under 8 bytes. We distribute the branches a
  864. // bit for short strings.
  865. if (size <= 8) {
  866. if (size >= 4) {
  867. CARBON_MCA_BEGIN("dynamic-8b");
  868. uint64_t data = Read4To8(data_ptr, size);
  869. // We optimize for latency on short strings by hashing both the data and
  870. // size in a single multiply here, using the small nature of size to
  871. // sample a specific sequence of bytes with well distributed bits into one
  872. // side of the multiply. This results in a *statistically* weak hash
  873. // function, but one with very low latency.
  874. //
  875. // Note that we don't drop to the `WeakMix` routine here because we want
  876. // to use sampled random data to encode the size, which may not be as
  877. // effective without the full 128-bit folded result.
  878. buffer = Mix(data ^ buffer, SampleAlignedRandomData(size - 1));
  879. CARBON_MCA_END("dynamic-8b");
  880. return;
  881. }
  882. // When we only have 0-3 bytes of string, we can avoid the cost of `Mix`.
  883. // Instead, for empty strings we can just XOR some of our data against the
  884. // existing buffer. For 1-3 byte lengths we do 3 one-byte reads adjusted to
  885. // always read in-bounds without branching. Then we OR the size into the 4th
  886. // byte and use `WeakMix`.
  887. CARBON_MCA_BEGIN("dynamic-4b");
  888. if (size == 0) {
  889. buffer ^= StaticRandomData[0];
  890. } else {
  891. uint64_t data = Read1To3(data_ptr, size) | size << 24;
  892. buffer = WeakMix(data);
  893. }
  894. CARBON_MCA_END("dynamic-4b");
  895. return;
  896. }
  897. if (size <= 16) {
  898. CARBON_MCA_BEGIN("dynamic-16b");
  899. // Similar to the above, we optimize primarily for latency here and spread
  900. // the incoming data across both ends of the multiply. Note that this does
  901. // have a drawback -- any time one half of the mix function becomes zero it
  902. // will fail to incorporate any bits from the other half. However, there is
  903. // exactly 1 in 2^64 values for each side that achieve this, and only when
  904. // the size is exactly 16 -- for smaller sizes there is an overlapping byte
  905. // that makes this impossible unless the seed is *also* incredibly unlucky.
  906. //
  907. // Because this hash function makes no attempt to defend against hash
  908. // flooding, we accept this risk in order to keep the latency low. If this
  909. // becomes a non-flooding problem, we can restrict the size to <16 and send
  910. // the 16-byte case down the next tier of cost.
  911. uint64_t size_hash = SampleRandomData(size);
  912. auto data = Read8To16(data_ptr, size);
  913. buffer = Mix(data.first ^ size_hash, data.second ^ buffer);
  914. CARBON_MCA_END("dynamic-16b");
  915. return;
  916. }
  917. if (size <= 32) {
  918. CARBON_MCA_BEGIN("dynamic-32b");
  919. // Do two mixes of overlapping 16-byte ranges in parallel to minimize
  920. // latency. We also incorporate the size by sampling random data into the
  921. // seed before both.
  922. buffer ^= SampleRandomData(size);
  923. uint64_t m0 = Mix(Read8(data_ptr) ^ StaticRandomData[1],
  924. Read8(data_ptr + 8) ^ buffer);
  925. const std::byte* tail_16b_ptr = data_ptr + (size - 16);
  926. uint64_t m1 = Mix(Read8(tail_16b_ptr) ^ StaticRandomData[3],
  927. Read8(tail_16b_ptr + 8) ^ buffer);
  928. // Just an XOR mix at the end is quite weak here, but we prefer that for
  929. // latency over a more robust approach. Doing another mix with the size (the
  930. // way longer string hashing does) increases the latency on x86-64
  931. // significantly (approx. 20%).
  932. buffer = m0 ^ m1;
  933. CARBON_MCA_END("dynamic-32b");
  934. return;
  935. }
  936. HashSizedBytesLarge(bytes);
  937. }
  938. } // namespace Carbon
  939. #endif // CARBON_COMMON_HASHING_H_