hashing.cpp 3.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "common/hashing.h"
  5. namespace Carbon {
  6. auto Hasher::HashSizedBytesLarge(llvm::ArrayRef<std::byte> bytes) -> void {
  7. const std::byte* data_ptr = bytes.data();
  8. const ssize_t size = bytes.size();
  9. CARBON_DCHECK(size > 32);
  10. // If we have 64 bytes or more, we're going to handle two 32-byte chunks at a
  11. // time using a simplified version of the main algorithm. This is based
  12. // heavily on the 64-byte and larger processing approach used by Abseil. The
  13. // goal is to mix the input data using as few multiplies (or other operations)
  14. // as we can and with as much [ILP][1] as we can. The ILP comes largely from
  15. // creating parallel structures to the operations.
  16. //
  17. // [1]: https://en.wikipedia.org/wiki/Instruction-level_parallelism
  18. auto mix32 = [](const std::byte* data_ptr, uint64_t buffer, uint64_t random0,
  19. uint64_t random1) {
  20. uint64_t a = Read8(data_ptr);
  21. uint64_t b = Read8(data_ptr + 8);
  22. uint64_t c = Read8(data_ptr + 16);
  23. uint64_t d = Read8(data_ptr + 24);
  24. uint64_t m0 = Mix(a ^ random0, b ^ buffer);
  25. uint64_t m1 = Mix(c ^ random1, d ^ buffer);
  26. return (m0 ^ m1);
  27. };
  28. // Prefetch the first bytes into cache.
  29. __builtin_prefetch(data_ptr, 0 /* read */, 0 /* discard after next use */);
  30. uint64_t buffer0 = buffer ^ StaticRandomData[0];
  31. uint64_t buffer1 = buffer ^ StaticRandomData[2];
  32. const std::byte* tail_32b_ptr = data_ptr + (size - 32);
  33. const std::byte* tail_16b_ptr = data_ptr + (size - 16);
  34. const std::byte* end_ptr = data_ptr + (size - 64);
  35. while (data_ptr < end_ptr) {
  36. // Prefetch the next 64-bytes while we process the current 64-bytes.
  37. __builtin_prefetch(data_ptr + 64, 0 /* read */,
  38. 0 /* discard after next use */);
  39. buffer0 =
  40. mix32(data_ptr, buffer0, StaticRandomData[4], StaticRandomData[5]);
  41. buffer1 =
  42. mix32(data_ptr + 32, buffer1, StaticRandomData[6], StaticRandomData[7]);
  43. data_ptr += 64;
  44. }
  45. // If we haven't reached our 32-byte tail pointer, consume another 32-bytes
  46. // directly.
  47. if (data_ptr < tail_32b_ptr) {
  48. buffer0 =
  49. mix32(data_ptr, buffer0, StaticRandomData[4], StaticRandomData[5]);
  50. data_ptr += 32;
  51. }
  52. if (data_ptr < tail_16b_ptr) {
  53. // We have more than 16-bytes in the tail so use a full 32-byte mix from the
  54. // 32-byte tail pointer.
  55. buffer1 =
  56. mix32(tail_32b_ptr, buffer1, StaticRandomData[6], StaticRandomData[7]);
  57. } else {
  58. // 16-bytes or less in the tail, do something more minimal instead of a full
  59. // 32-byte mix. As this only involves a single multiply, we don't decompose
  60. // further even when the tail is (much) shorter.
  61. buffer1 = Mix(Read8(tail_16b_ptr) ^ StaticRandomData[6],
  62. Read8(tail_16b_ptr + 8) ^ buffer1);
  63. }
  64. buffer = buffer0 ^ buffer1;
  65. HashDense(size);
  66. }
  67. } // namespace Carbon