value_store.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #ifndef CARBON_TOOLCHAIN_BASE_VALUE_STORE_H_
  5. #define CARBON_TOOLCHAIN_BASE_VALUE_STORE_H_
  6. #include <type_traits>
  7. #include "common/check.h"
  8. #include "common/ostream.h"
  9. #include "common/set.h"
  10. #include "llvm/ADT/APFloat.h"
  11. #include "llvm/ADT/APInt.h"
  12. #include "llvm/ADT/Sequence.h"
  13. #include "llvm/ADT/SmallVector.h"
  14. #include "llvm/ADT/StringExtras.h"
  15. #include "llvm/Support/YAMLParser.h"
  16. #include "toolchain/base/index_base.h"
  17. #include "toolchain/base/mem_usage.h"
  18. #include "toolchain/base/yaml.h"
  19. namespace Carbon {
  20. // The value of a real literal token.
  21. //
  22. // This is either a dyadic fraction (mantissa * 2^exponent) or a decadic
  23. // fraction (mantissa * 10^exponent).
  24. //
  25. // These values are not canonicalized, because we don't expect them to repeat
  26. // and don't use them in SemIR values.
  27. class Real : public Printable<Real> {
  28. public:
  29. auto Print(llvm::raw_ostream& output_stream) const -> void {
  30. mantissa.print(output_stream, /*isSigned=*/false);
  31. output_stream << "*" << (is_decimal ? "10" : "2") << "^" << exponent;
  32. }
  33. // The mantissa, represented as an unsigned integer.
  34. llvm::APInt mantissa;
  35. // The exponent, represented as a signed integer.
  36. llvm::APInt exponent;
  37. // If false, the value is mantissa * 2^exponent.
  38. // If true, the value is mantissa * 10^exponent.
  39. // TODO: This field increases Real from 32 bytes to 40 bytes. Consider
  40. // changing how it's tracked for space savings.
  41. bool is_decimal;
  42. };
  43. // Corresponds to an integer value represented by an APInt. This is used both
  44. // for integer literal tokens, which are unsigned and have an unspecified
  45. // bit-width, and integer values in SemIR, which have a signedness and bit-width
  46. // matching their type.
  47. struct IntId : public IdBase, public Printable<IntId> {
  48. using ValueType = llvm::APInt;
  49. static const IntId Invalid;
  50. using IdBase::IdBase;
  51. auto Print(llvm::raw_ostream& out) const -> void {
  52. out << "int";
  53. IdBase::Print(out);
  54. }
  55. };
  56. constexpr IntId IntId::Invalid(IntId::InvalidIndex);
  57. // Corresponds to a float value represented by an APFloat. This is used for
  58. // floating-point values in SemIR.
  59. struct FloatId : public IdBase, public Printable<FloatId> {
  60. using ValueType = llvm::APFloat;
  61. static const FloatId Invalid;
  62. using IdBase::IdBase;
  63. auto Print(llvm::raw_ostream& out) const -> void {
  64. out << "float";
  65. IdBase::Print(out);
  66. }
  67. };
  68. constexpr FloatId FloatId::Invalid(FloatId::InvalidIndex);
  69. // Corresponds to a Real value.
  70. struct RealId : public IdBase, public Printable<RealId> {
  71. using ValueType = Real;
  72. static const RealId Invalid;
  73. using IdBase::IdBase;
  74. auto Print(llvm::raw_ostream& out) const -> void {
  75. out << "real";
  76. IdBase::Print(out);
  77. }
  78. };
  79. constexpr RealId RealId::Invalid(RealId::InvalidIndex);
  80. // Corresponds to StringRefs for identifiers.
  81. //
  82. // `NameId` relies on the values of this type other than `Invalid` all being
  83. // non-negative.
  84. struct IdentifierId : public IdBase, public Printable<IdentifierId> {
  85. using ValueType = llvm::StringRef;
  86. static const IdentifierId Invalid;
  87. using IdBase::IdBase;
  88. auto Print(llvm::raw_ostream& out) const -> void {
  89. out << "identifier";
  90. IdBase::Print(out);
  91. }
  92. };
  93. constexpr IdentifierId IdentifierId::Invalid(IdentifierId::InvalidIndex);
  94. // Corresponds to StringRefs for string literals.
  95. struct StringLiteralValueId : public IdBase,
  96. public Printable<StringLiteralValueId> {
  97. using ValueType = llvm::StringRef;
  98. static const StringLiteralValueId Invalid;
  99. using IdBase::IdBase;
  100. auto Print(llvm::raw_ostream& out) const -> void {
  101. out << "string";
  102. IdBase::Print(out);
  103. }
  104. };
  105. constexpr StringLiteralValueId StringLiteralValueId::Invalid(
  106. StringLiteralValueId::InvalidIndex);
  107. namespace Internal {
  108. // Used as a parent class for non-printable types. This is just for
  109. // std::conditional, not as an API.
  110. class ValueStoreNotPrintable {};
  111. } // namespace Internal
  112. // A simple wrapper for accumulating values, providing IDs to later retrieve the
  113. // value. This does not do deduplication.
  114. //
  115. // IdT::ValueType must represent the type being indexed.
  116. template <typename IdT>
  117. class ValueStore
  118. : public std::conditional<
  119. std::is_base_of_v<Printable<typename IdT::ValueType>,
  120. typename IdT::ValueType>,
  121. Yaml::Printable<ValueStore<IdT>>, Internal::ValueStoreNotPrintable> {
  122. public:
  123. using ValueType = typename IdT::ValueType;
  124. // Typically we want to use `ValueType&` and `const ValueType& to avoid
  125. // copies, but when the value type is a `StringRef`, we assume external
  126. // storage for the string data and both our value type and ref type will be
  127. // `StringRef`. This will preclude mutation of the string data.
  128. using RefType = std::conditional_t<std::same_as<llvm::StringRef, ValueType>,
  129. llvm::StringRef, ValueType&>;
  130. using ConstRefType =
  131. std::conditional_t<std::same_as<llvm::StringRef, ValueType>,
  132. llvm::StringRef, const ValueType&>;
  133. // Stores the value and returns an ID to reference it.
  134. auto Add(ValueType value) -> IdT {
  135. IdT id(values_.size());
  136. // This routine is especially hot and the check here relatively expensive
  137. // for the value provided, so only do this in debug builds to make tracking
  138. // down issues easier.
  139. CARBON_DCHECK(id.index >= 0, "Id overflow");
  140. values_.push_back(std::move(value));
  141. return id;
  142. }
  143. // Adds a default constructed value and returns an ID to reference it.
  144. auto AddDefaultValue() -> IdT {
  145. IdT id(values_.size());
  146. values_.resize(id.index + 1);
  147. return id;
  148. }
  149. // Returns a mutable value for an ID.
  150. auto Get(IdT id) -> RefType {
  151. CARBON_DCHECK(id.index >= 0, "{0}", id);
  152. return values_[id.index];
  153. }
  154. // Returns the value for an ID.
  155. auto Get(IdT id) const -> ConstRefType {
  156. CARBON_DCHECK(id.index >= 0, "{0}", id);
  157. return values_[id.index];
  158. }
  159. // Reserves space.
  160. auto Reserve(size_t size) -> void { values_.reserve(size); }
  161. // These are to support printable structures, and are not guaranteed.
  162. auto OutputYaml() const -> Yaml::OutputMapping {
  163. return Yaml::OutputMapping([&](Yaml::OutputMapping::Map map) {
  164. for (auto i : llvm::seq(values_.size())) {
  165. IdT id(i);
  166. map.Add(PrintToString(id), Yaml::OutputScalar(Get(id)));
  167. }
  168. });
  169. }
  170. // Collects memory usage of the values.
  171. auto CollectMemUsage(MemUsage& mem_usage, llvm::StringRef label) const
  172. -> void {
  173. mem_usage.Add(label.str(), values_);
  174. }
  175. auto array_ref() const -> llvm::ArrayRef<ValueType> { return values_; }
  176. auto size() const -> size_t { return values_.size(); }
  177. private:
  178. // Set inline size to 0 because these will typically be too large for the
  179. // stack, while this does make File smaller.
  180. llvm::SmallVector<std::decay_t<ValueType>, 0> values_;
  181. };
  182. // A wrapper for accumulating immutable values with deduplication, providing IDs
  183. // to later retrieve the value.
  184. //
  185. // IdT::ValueType must represent the type being indexed.
  186. template <typename IdT>
  187. class CanonicalValueStore {
  188. public:
  189. using ValueType = typename IdT::ValueType;
  190. using RefType = typename ValueStore<IdT>::RefType;
  191. using ConstRefType = typename ValueStore<IdT>::ConstRefType;
  192. // Stores a canonical copy of the value and returns an ID to reference it.
  193. auto Add(ValueType value) -> IdT;
  194. // Returns the value for an ID.
  195. auto Get(IdT id) const -> ConstRefType { return values_.Get(id); }
  196. // Looks up the canonical ID for a value, or returns invalid if not in the
  197. // store.
  198. auto Lookup(ValueType value) const -> IdT;
  199. // Reserves space.
  200. auto Reserve(size_t size) -> void;
  201. // These are to support printable structures, and are not guaranteed.
  202. auto OutputYaml() const -> Yaml::OutputMapping {
  203. return values_.OutputYaml();
  204. }
  205. auto array_ref() const -> llvm::ArrayRef<ValueType> {
  206. return values_.array_ref();
  207. }
  208. auto size() const -> size_t { return values_.size(); }
  209. // Collects memory usage of the values and deduplication set.
  210. auto CollectMemUsage(MemUsage& mem_usage, llvm::StringRef label) const
  211. -> void {
  212. mem_usage.Collect(MemUsage::ConcatLabel(label, "values_"), values_);
  213. auto bytes =
  214. set_.ComputeMetrics(KeyContext(values_.array_ref())).storage_bytes;
  215. mem_usage.Add(MemUsage::ConcatLabel(label, "set_"), bytes, bytes);
  216. }
  217. private:
  218. class KeyContext;
  219. ValueStore<IdT> values_;
  220. Set<IdT, /*SmallSize=*/0, KeyContext> set_;
  221. };
  222. template <typename IdT>
  223. class CanonicalValueStore<IdT>::KeyContext
  224. : public TranslatingKeyContext<KeyContext> {
  225. public:
  226. explicit KeyContext(llvm::ArrayRef<ValueType> values) : values_(values) {}
  227. // Note that it is safe to return a `const` reference here as the underlying
  228. // object's lifetime is provided by the `store_`.
  229. auto TranslateKey(IdT id) const -> const ValueType& {
  230. return values_[id.index];
  231. }
  232. private:
  233. llvm::ArrayRef<ValueType> values_;
  234. };
  235. template <typename IdT>
  236. auto CanonicalValueStore<IdT>::Add(ValueType value) -> IdT {
  237. auto make_key = [&] { return IdT(values_.Add(std::move(value))); };
  238. return set_.Insert(value, make_key, KeyContext(values_.array_ref())).key();
  239. }
  240. template <typename IdT>
  241. auto CanonicalValueStore<IdT>::Lookup(ValueType value) const -> IdT {
  242. if (auto result = set_.Lookup(value, KeyContext(values_.array_ref()))) {
  243. return result.key();
  244. }
  245. return IdT::Invalid;
  246. }
  247. template <typename IdT>
  248. auto CanonicalValueStore<IdT>::Reserve(size_t size) -> void {
  249. // Compute the resulting new insert count using the size of values -- the
  250. // set doesn't have a fast to compute current size.
  251. if (size > values_.size()) {
  252. set_.GrowForInsertCount(size - values_.size(),
  253. KeyContext(values_.array_ref()));
  254. }
  255. values_.Reserve(size);
  256. }
  257. using FloatValueStore = CanonicalValueStore<FloatId>;
  258. // Stores that will be used across compiler phases for a given compilation unit.
  259. // This is provided mainly so that they don't need to be passed separately.
  260. class SharedValueStores : public Yaml::Printable<SharedValueStores> {
  261. public:
  262. explicit SharedValueStores() = default;
  263. // Not copyable or movable.
  264. SharedValueStores(const SharedValueStores&) = delete;
  265. auto operator=(const SharedValueStores&) -> SharedValueStores& = delete;
  266. auto identifiers() -> CanonicalValueStore<IdentifierId>& {
  267. return identifiers_;
  268. }
  269. auto identifiers() const -> const CanonicalValueStore<IdentifierId>& {
  270. return identifiers_;
  271. }
  272. auto ints() -> CanonicalValueStore<IntId>& { return ints_; }
  273. auto ints() const -> const CanonicalValueStore<IntId>& { return ints_; }
  274. auto reals() -> ValueStore<RealId>& { return reals_; }
  275. auto reals() const -> const ValueStore<RealId>& { return reals_; }
  276. auto floats() -> FloatValueStore& { return floats_; }
  277. auto floats() const -> const FloatValueStore& { return floats_; }
  278. auto string_literal_values() -> CanonicalValueStore<StringLiteralValueId>& {
  279. return string_literals_;
  280. }
  281. auto string_literal_values() const
  282. -> const CanonicalValueStore<StringLiteralValueId>& {
  283. return string_literals_;
  284. }
  285. auto OutputYaml(std::optional<llvm::StringRef> filename = std::nullopt) const
  286. -> Yaml::OutputMapping {
  287. return Yaml::OutputMapping([&, filename](Yaml::OutputMapping::Map map) {
  288. if (filename) {
  289. map.Add("filename", *filename);
  290. }
  291. map.Add("shared_values",
  292. Yaml::OutputMapping([&](Yaml::OutputMapping::Map map) {
  293. map.Add("ints", ints_.OutputYaml());
  294. map.Add("reals", reals_.OutputYaml());
  295. map.Add("identifiers", identifiers_.OutputYaml());
  296. map.Add("strings", string_literals_.OutputYaml());
  297. }));
  298. });
  299. }
  300. // Collects memory usage for the various shared stores.
  301. auto CollectMemUsage(MemUsage& mem_usage, llvm::StringRef label) const
  302. -> void {
  303. mem_usage.Collect(MemUsage::ConcatLabel(label, "ints_"), ints_);
  304. mem_usage.Collect(MemUsage::ConcatLabel(label, "reals_"), reals_);
  305. mem_usage.Collect(MemUsage::ConcatLabel(label, "floats_"), floats_);
  306. mem_usage.Collect(MemUsage::ConcatLabel(label, "identifiers_"),
  307. identifiers_);
  308. mem_usage.Collect(MemUsage::ConcatLabel(label, "string_literals_"),
  309. string_literals_);
  310. }
  311. private:
  312. CanonicalValueStore<IntId> ints_;
  313. ValueStore<RealId> reals_;
  314. FloatValueStore floats_;
  315. CanonicalValueStore<IdentifierId> identifiers_;
  316. CanonicalValueStore<StringLiteralValueId> string_literals_;
  317. };
  318. } // namespace Carbon
  319. #endif // CARBON_TOOLCHAIN_BASE_VALUE_STORE_H_