tree_and_subtrees.cpp 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "toolchain/parse/tree_and_subtrees.h"
  5. namespace Carbon::Parse {
  6. TreeAndSubtrees::TreeAndSubtrees(const Lex::TokenizedBuffer& tokens,
  7. const Tree& tree)
  8. : tokens_(&tokens), tree_(&tree) {
  9. subtree_sizes_.reserve(tree_->size());
  10. // A stack of nodes which haven't yet been used as children.
  11. llvm::SmallVector<NodeId> size_stack;
  12. for (auto n : tree.postorder()) {
  13. // Nodes always include themselves.
  14. int32_t size = 1;
  15. auto kind = tree.node_kind(n);
  16. if (kind.has_child_count()) {
  17. // When the child count is set, remove the specific number from the stack.
  18. CARBON_CHECK(static_cast<int32_t>(size_stack.size()) >=
  19. kind.child_count())
  20. << "Need " << kind.child_count() << " children for " << kind
  21. << ", have " << size_stack.size() << " available";
  22. for (auto i : llvm::seq(kind.child_count())) {
  23. auto child = size_stack.pop_back_val();
  24. CARBON_CHECK((size_t)child.index < subtree_sizes_.size());
  25. size += subtree_sizes_[child.index];
  26. if (kind.has_bracket() && i == kind.child_count() - 1) {
  27. CARBON_CHECK(kind.bracket() == tree.node_kind(child))
  28. << "Node " << kind << " needs bracket " << kind.bracket()
  29. << ", found wrong bracket " << tree.node_kind(child);
  30. }
  31. }
  32. } else {
  33. while (true) {
  34. CARBON_CHECK(!size_stack.empty())
  35. << "Node " << kind << " is missing bracket " << kind.bracket();
  36. auto child = size_stack.pop_back_val();
  37. size += subtree_sizes_[child.index];
  38. if (kind.bracket() == tree.node_kind(child)) {
  39. break;
  40. }
  41. }
  42. }
  43. size_stack.push_back(n);
  44. subtree_sizes_.push_back(size);
  45. }
  46. CARBON_CHECK(static_cast<int>(subtree_sizes_.size()) == tree_->size());
  47. // Remaining nodes should all be roots in the tree; make sure they line up.
  48. CARBON_CHECK(size_stack.back().index ==
  49. static_cast<int32_t>(tree_->size()) - 1)
  50. << size_stack.back() << " " << tree_->size() - 1;
  51. int prev_index = -1;
  52. for (const auto& n : size_stack) {
  53. CARBON_CHECK(n.index - subtree_sizes_[n.index] == prev_index)
  54. << "NodeId " << n << " is a root " << tree_->node_kind(n)
  55. << " with subtree_size " << subtree_sizes_[n.index]
  56. << ", but previous root was at " << prev_index << ".";
  57. prev_index = n.index;
  58. }
  59. }
  60. auto TreeAndSubtrees::VerifyExtract(NodeId node_id, NodeKind kind,
  61. ErrorBuilder* trace) const -> bool {
  62. switch (kind) {
  63. #define CARBON_PARSE_NODE_KIND(Name) \
  64. case NodeKind::Name: \
  65. return VerifyExtractAs<Name>(node_id, trace).has_value();
  66. #include "toolchain/parse/node_kind.def"
  67. }
  68. }
  69. auto TreeAndSubtrees::Verify() const -> ErrorOr<Success> {
  70. // Validate that each node extracts successfully when not marked as having an
  71. // error.
  72. //
  73. // Without this code, a 10 mloc test case of lex & parse takes 4.129 s ± 0.041
  74. // s. With this additional verification, it takes 5.768 s ± 0.036 s.
  75. for (NodeId n : tree_->postorder()) {
  76. if (tree_->node_has_error(n)) {
  77. continue;
  78. }
  79. auto node_kind = tree_->node_kind(n);
  80. if (!VerifyExtract(n, node_kind, nullptr)) {
  81. ErrorBuilder trace;
  82. trace << llvm::formatv(
  83. "NodeId #{0} couldn't be extracted as a {1}. Trace:\n", n, node_kind);
  84. VerifyExtract(n, node_kind, &trace);
  85. return trace;
  86. }
  87. }
  88. // Validate the roots. Also ensures Tree::ExtractFile() doesn't error.
  89. if (!TryExtractNodeFromChildren<File>(NodeId::Invalid, roots(), nullptr)) {
  90. ErrorBuilder trace;
  91. trace << "Roots of tree couldn't be extracted as a `File`. Trace:\n";
  92. TryExtractNodeFromChildren<File>(NodeId::Invalid, roots(), &trace);
  93. return trace;
  94. }
  95. return Success();
  96. }
  97. auto TreeAndSubtrees::postorder(NodeId n) const
  98. -> llvm::iterator_range<Tree::PostorderIterator> {
  99. // The postorder ends after this node, the root, and begins at the start of
  100. // its subtree.
  101. int start_index = n.index - subtree_sizes_[n.index] + 1;
  102. return Tree::PostorderIterator::MakeRange(NodeId(start_index), n);
  103. }
  104. auto TreeAndSubtrees::children(NodeId n) const
  105. -> llvm::iterator_range<SiblingIterator> {
  106. CARBON_CHECK(n.is_valid());
  107. int end_index = n.index - subtree_sizes_[n.index];
  108. return llvm::iterator_range<SiblingIterator>(
  109. SiblingIterator(*this, NodeId(n.index - 1)),
  110. SiblingIterator(*this, NodeId(end_index)));
  111. }
  112. auto TreeAndSubtrees::roots() const -> llvm::iterator_range<SiblingIterator> {
  113. return llvm::iterator_range<SiblingIterator>(
  114. SiblingIterator(*this,
  115. NodeId(static_cast<int>(subtree_sizes_.size()) - 1)),
  116. SiblingIterator(*this, NodeId(-1)));
  117. }
  118. auto TreeAndSubtrees::PrintNode(llvm::raw_ostream& output, NodeId n, int depth,
  119. bool preorder) const -> bool {
  120. output.indent(2 * (depth + 2));
  121. output << "{";
  122. // If children are being added, include node_index in order to disambiguate
  123. // nodes.
  124. if (preorder) {
  125. output << "node_index: " << n << ", ";
  126. }
  127. output << "kind: '" << tree_->node_kind(n) << "', text: '"
  128. << tokens_->GetTokenText(tree_->node_token(n)) << "'";
  129. if (tree_->node_has_error(n)) {
  130. output << ", has_error: yes";
  131. }
  132. if (subtree_sizes_[n.index] > 1) {
  133. output << ", subtree_size: " << subtree_sizes_[n.index];
  134. if (preorder) {
  135. output << ", children: [\n";
  136. return true;
  137. }
  138. }
  139. output << "}";
  140. return false;
  141. }
  142. auto TreeAndSubtrees::Print(llvm::raw_ostream& output) const -> void {
  143. output << "- filename: " << tokens_->source().filename() << "\n"
  144. << " parse_tree: [\n";
  145. // Walk the tree just to calculate depths for each node.
  146. llvm::SmallVector<int> indents;
  147. indents.resize(subtree_sizes_.size(), 0);
  148. llvm::SmallVector<std::pair<NodeId, int>, 16> node_stack;
  149. for (NodeId n : roots()) {
  150. node_stack.push_back({n, 0});
  151. }
  152. while (!node_stack.empty()) {
  153. NodeId n = NodeId::Invalid;
  154. int depth;
  155. std::tie(n, depth) = node_stack.pop_back_val();
  156. for (NodeId sibling_n : children(n)) {
  157. indents[sibling_n.index] = depth + 1;
  158. node_stack.push_back({sibling_n, depth + 1});
  159. }
  160. }
  161. for (NodeId n : tree_->postorder()) {
  162. PrintNode(output, n, indents[n.index], /*preorder=*/false);
  163. output << ",\n";
  164. }
  165. output << " ]\n";
  166. }
  167. auto TreeAndSubtrees::PrintPreorder(llvm::raw_ostream& output) const -> void {
  168. output << "- filename: " << tokens_->source().filename() << "\n"
  169. << " parse_tree: [\n";
  170. // The parse tree is stored in postorder. The preorder can be constructed
  171. // by reversing the order of each level of siblings within an RPO. The
  172. // sibling iterators are directly built around RPO and so can be used with a
  173. // stack to produce preorder.
  174. // The roots, like siblings, are in RPO (so reversed), but we add them in
  175. // order here because we'll pop off the stack effectively reversing then.
  176. llvm::SmallVector<std::pair<NodeId, int>, 16> node_stack;
  177. for (NodeId n : roots()) {
  178. node_stack.push_back({n, 0});
  179. }
  180. while (!node_stack.empty()) {
  181. NodeId n = NodeId::Invalid;
  182. int depth;
  183. std::tie(n, depth) = node_stack.pop_back_val();
  184. if (PrintNode(output, n, depth, /*preorder=*/true)) {
  185. // Has children, so we descend. We append the children in order here as
  186. // well because they will get reversed when popped off the stack.
  187. for (NodeId sibling_n : children(n)) {
  188. node_stack.push_back({sibling_n, depth + 1});
  189. }
  190. continue;
  191. }
  192. int next_depth = node_stack.empty() ? 0 : node_stack.back().second;
  193. CARBON_CHECK(next_depth <= depth) << "Cannot have the next depth increase!";
  194. for (int close_children_count : llvm::seq(0, depth - next_depth)) {
  195. (void)close_children_count;
  196. output << "]}";
  197. }
  198. // We always end with a comma and a new line as we'll move to the next
  199. // node at whatever the current level ends up being.
  200. output << " ,\n";
  201. }
  202. output << " ]\n";
  203. }
  204. auto TreeAndSubtrees::CollectMemUsage(MemUsage& mem_usage,
  205. llvm::StringRef label) const -> void {
  206. mem_usage.Add(MemUsage::ConcatLabel(label, "subtree_sizes_"), subtree_sizes_);
  207. }
  208. auto TreeAndSubtrees::SiblingIterator::Print(llvm::raw_ostream& output) const
  209. -> void {
  210. output << node_;
  211. }
  212. } // namespace Carbon::Parse