ソースを参照

Split Parse out to its own target. (#3556)

This is mirroring the structure of codegen/codegen.h, lower/lower.h, and
check/check.h. I recently did lex/lex.h, so parse/parse.h is the last.
Now, the directory's main API file is eponymous with the directory.

I could've used a friend function to avoid making the Tree constructor
public, but in other places we make less use of `friend`, just leaving
things public. This felt more consistent, and simple because it only
affects the constructor.
Jon Ross-Perkins 2 年 前
コミット
c8b30d3eec

+ 1 - 0
language_server/BUILD

@@ -25,6 +25,7 @@ cc_binary(
         "//toolchain/diagnostics:null_diagnostics",
         "//toolchain/lex",
         "//toolchain/lex:tokenized_buffer",
+        "//toolchain/parse",
         "//toolchain/parse:node_kind",
         "//toolchain/parse:tree",
         "//toolchain/source:source_buffer",

+ 2 - 2
language_server/language_server.cpp

@@ -9,7 +9,7 @@
 #include "toolchain/diagnostics/null_diagnostics.h"
 #include "toolchain/lex/lex.h"
 #include "toolchain/parse/node_kind.h"
-#include "toolchain/parse/tree.h"
+#include "toolchain/parse/parse.h"
 #include "toolchain/source/source_buffer.h"
 
 namespace Carbon::LS {
@@ -101,7 +101,7 @@ void LanguageServer::OnDocumentSymbol(
 
   auto buf = SourceBuffer::CreateFromFile(vfs, file, NullDiagnosticConsumer());
   auto lexed = Lex::Lex(value_stores, *buf, NullDiagnosticConsumer());
-  auto parsed = Parse::Tree::Parse(lexed, NullDiagnosticConsumer(), nullptr);
+  auto parsed = Parse::Parse(lexed, NullDiagnosticConsumer(), nullptr);
   std::vector<clang::clangd::DocumentSymbol> result;
   for (const auto& node : parsed.postorder()) {
     clang::clangd::SymbolKind symbol_kind;

+ 1 - 1
toolchain/driver/BUILD

@@ -28,7 +28,7 @@ cc_library(
         "//toolchain/diagnostics:sorting_diagnostic_consumer",
         "//toolchain/lex",
         "//toolchain/lower",
-        "//toolchain/parse:tree",
+        "//toolchain/parse",
         "//toolchain/sem_ir:file",
         "//toolchain/sem_ir:formatter",
         "//toolchain/source:source_buffer",

+ 3 - 3
toolchain/driver/driver.cpp

@@ -23,7 +23,7 @@
 #include "toolchain/diagnostics/sorting_diagnostic_consumer.h"
 #include "toolchain/lex/lex.h"
 #include "toolchain/lower/lower.h"
-#include "toolchain/parse/tree.h"
+#include "toolchain/parse/parse.h"
 #include "toolchain/sem_ir/formatter.h"
 #include "toolchain/source/source_buffer.h"
 
@@ -438,8 +438,8 @@ class Driver::CompilationUnit {
   auto RunParse() -> bool {
     CARBON_CHECK(tokens_);
 
-    LogCall("Parse::Tree::Parse", [&] {
-      parse_tree_ = Parse::Tree::Parse(*tokens_, *consumer_, vlog_stream_);
+    LogCall("Parse::Parse", [&] {
+      parse_tree_ = Parse::Parse(*tokens_, *consumer_, vlog_stream_);
     });
     if (options_.dump_parse_tree) {
       consumer_->Flush();

+ 33 - 13
toolchain/parse/BUILD

@@ -37,6 +37,7 @@ cc_test(
     srcs = ["typed_nodes_test.cpp"],
     deps = [
         ":node_kind",
+        ":parse",
         ":tree",
         "//testing/base:gtest_main",
         "//toolchain/diagnostics:diagnostic_emitter",
@@ -47,6 +48,35 @@ cc_test(
     ],
 )
 
+cc_library(
+    name = "parse",
+    srcs = [
+        "context.cpp",
+        "context.h",
+        "parse.cpp",
+    ] +
+    # Glob handler files to avoid missing any.
+    glob([
+        "handle_*.cpp",
+    ]),
+    hdrs = ["parse.h"],
+    deps = [
+        ":node_kind",
+        ":precedence",
+        ":state",
+        ":tree",
+        "//common:check",
+        "//common:ostream",
+        "//common:vlog",
+        "//toolchain/base:pretty_stack_trace_function",
+        "//toolchain/base:value_store",
+        "//toolchain/diagnostics:diagnostic_emitter",
+        "//toolchain/lex:token_kind",
+        "//toolchain/lex:tokenized_buffer",
+        "@llvm-project//llvm:Support",
+    ],
+)
+
 cc_library(
     name = "state",
     srcs = ["state.cpp"],
@@ -58,29 +88,18 @@ cc_library(
 cc_library(
     name = "tree",
     srcs = [
-        "context.cpp",
-        "context.h",
         "extract.cpp",
         "tree.cpp",
-    ] +
-    # Glob handler files to avoid missing any.
-    glob([
-        "handle_*.cpp",
-    ]),
+    ],
     hdrs = ["tree.h"],
     deps = [
         ":node_kind",
-        ":precedence",
-        ":state",
         "//common:check",
         "//common:error",
         "//common:ostream",
         "//common:struct_reflection",
-        "//common:vlog",
         "//toolchain/base:pretty_stack_trace_function",
-        "//toolchain/base:value_store",
         "//toolchain/diagnostics:diagnostic_emitter",
-        "//toolchain/lex:token_kind",
         "//toolchain/lex:tokenized_buffer",
         "@llvm-project//llvm:Support",
     ],
@@ -92,6 +111,7 @@ cc_test(
     srcs = ["tree_test.cpp"],
     deps = [
         ":node_kind",
+        ":parse",
         ":tree",
         "//common:ostream",
         "//testing/base:gtest_main",
@@ -113,7 +133,7 @@ cc_fuzz_test(
     srcs = ["parse_fuzzer.cpp"],
     corpus = glob(["fuzzer_corpus/*"]),
     deps = [
-        ":tree",
+        ":parse",
         "//common:check",
         "//toolchain/base:value_store",
         "//toolchain/diagnostics:diagnostic_emitter",

+ 61 - 0
toolchain/parse/parse.cpp

@@ -0,0 +1,61 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "common/check.h"
+#include "toolchain/base/pretty_stack_trace_function.h"
+#include "toolchain/parse/context.h"
+#include "toolchain/parse/node_kind.h"
+#include "toolchain/parse/typed_nodes.h"
+
+namespace Carbon::Parse {
+
+auto HandleInvalid(Context& context) -> void {
+  CARBON_FATAL() << "The Invalid state shouldn't be on the stack: "
+                 << context.PopState();
+}
+
+auto Parse(Lex::TokenizedBuffer& tokens, DiagnosticConsumer& consumer,
+           llvm::raw_ostream* vlog_stream) -> Tree {
+  Lex::TokenLocationTranslator translator(&tokens);
+  Lex::TokenDiagnosticEmitter emitter(translator, consumer);
+
+  // Delegate to the parser.
+  Tree tree(tokens);
+  Context context(tree, tokens, emitter, vlog_stream);
+  PrettyStackTraceFunction context_dumper(
+      [&](llvm::raw_ostream& output) { context.PrintForStackDump(output); });
+
+  context.AddLeafNode(NodeKind::FileStart,
+                      context.ConsumeChecked(Lex::TokenKind::FileStart));
+
+  context.PushState(State::DeclScopeLoop);
+
+  while (!context.state_stack().empty()) {
+    // clang warns on unhandled enum values; clang-tidy is incorrect here.
+    // NOLINTNEXTLINE(bugprone-switch-missing-default-case)
+    switch (context.state_stack().back().state) {
+#define CARBON_PARSE_STATE(Name) \
+  case State::Name:              \
+    Handle##Name(context);       \
+    break;
+#include "toolchain/parse/state.def"
+    }
+  }
+
+  context.AddLeafNode(NodeKind::FileEnd, *context.position());
+
+  if (auto verify = tree.Verify(); !verify.ok()) {
+    // TODO: This is temporarily printing to stderr directly during development.
+    // If we can, restrict this to a subtree with the error and add it to the
+    // stack trace (such as with PrettyStackTraceFunction). Otherwise, switch
+    // back to vlog_stream prior to broader distribution so that end users are
+    // hopefully comfortable copy-pasting stderr when there are bugs in tree
+    // construction.
+    tree.Print(llvm::errs());
+    CARBON_FATAL() << "Invalid tree returned by Parse(): " << verify.error();
+  }
+  return tree;
+}
+
+}  // namespace Carbon::Parse

+ 23 - 0
toolchain/parse/parse.h

@@ -0,0 +1,23 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef CARBON_TOOLCHAIN_PARSE_PARSE_H_
+#define CARBON_TOOLCHAIN_PARSE_PARSE_H_
+
+#include "common/ostream.h"
+#include "toolchain/diagnostics/diagnostic_emitter.h"
+#include "toolchain/lex/tokenized_buffer.h"
+#include "toolchain/parse/tree.h"
+
+namespace Carbon::Parse {
+
+// Parses the token buffer into a `Tree`.
+//
+// This is the factory function which is used to build parse trees.
+auto Parse(Lex::TokenizedBuffer& tokens, DiagnosticConsumer& consumer,
+           llvm::raw_ostream* vlog_stream) -> Tree;
+
+}  // namespace Carbon::Parse
+
+#endif  // CARBON_TOOLCHAIN_PARSE_PARSE_H_

+ 2 - 2
toolchain/parse/parse_fuzzer.cpp

@@ -9,7 +9,7 @@
 #include "toolchain/base/value_store.h"
 #include "toolchain/diagnostics/null_diagnostics.h"
 #include "toolchain/lex/lex.h"
-#include "toolchain/parse/tree.h"
+#include "toolchain/parse/parse.h"
 
 namespace Carbon::Testing {
 
@@ -40,7 +40,7 @@ extern "C" int LLVMFuzzerTestOneInput(const unsigned char* data,
 
   // Now parse it into a tree. Note that parsing will (when asserts are enabled)
   // walk the entire tree to verify it so we don't have to do that here.
-  Parse::Tree::Parse(tokens, NullDiagnosticConsumer(), /*vlog_stream=*/nullptr);
+  Parse::Parse(tokens, NullDiagnosticConsumer(), /*vlog_stream=*/nullptr);
   return 0;
 }
 

+ 0 - 49
toolchain/parse/tree.cpp

@@ -10,60 +10,11 @@
 #include "llvm/ADT/SmallVector.h"
 #include "toolchain/base/pretty_stack_trace_function.h"
 #include "toolchain/lex/tokenized_buffer.h"
-#include "toolchain/parse/context.h"
 #include "toolchain/parse/node_kind.h"
 #include "toolchain/parse/typed_nodes.h"
 
 namespace Carbon::Parse {
 
-auto HandleInvalid(Context& context) -> void {
-  CARBON_FATAL() << "The Invalid state shouldn't be on the stack: "
-                 << context.PopState();
-}
-
-auto Tree::Parse(Lex::TokenizedBuffer& tokens, DiagnosticConsumer& consumer,
-                 llvm::raw_ostream* vlog_stream) -> Tree {
-  Lex::TokenLocationTranslator translator(&tokens);
-  Lex::TokenDiagnosticEmitter emitter(translator, consumer);
-
-  // Delegate to the parser.
-  Tree tree(tokens);
-  Context context(tree, tokens, emitter, vlog_stream);
-  PrettyStackTraceFunction context_dumper(
-      [&](llvm::raw_ostream& output) { context.PrintForStackDump(output); });
-
-  context.AddLeafNode(NodeKind::FileStart,
-                      context.ConsumeChecked(Lex::TokenKind::FileStart));
-
-  context.PushState(State::DeclScopeLoop);
-
-  while (!context.state_stack().empty()) {
-    // clang warns on unhandled enum values; clang-tidy is incorrect here.
-    // NOLINTNEXTLINE(bugprone-switch-missing-default-case)
-    switch (context.state_stack().back().state) {
-#define CARBON_PARSE_STATE(Name) \
-  case State::Name:              \
-    Handle##Name(context);       \
-    break;
-#include "toolchain/parse/state.def"
-    }
-  }
-
-  context.AddLeafNode(NodeKind::FileEnd, *context.position());
-
-  if (auto verify = tree.Verify(); !verify.ok()) {
-    // TODO: This is temporarily printing to stderr directly during development.
-    // If we can, restrict this to a subtree with the error and add it to the
-    // stack trace (such as with PrettyStackTraceFunction). Otherwise, switch
-    // back to vlog_stream prior to broader distribution so that end users are
-    // hopefully comfortable copy-pasting stderr when there are bugs in tree
-    // construction.
-    tree.Print(llvm::errs());
-    CARBON_FATAL() << "Invalid tree returned by Parse(): " << verify.error();
-  }
-  return tree;
-}
-
 auto Tree::postorder() const -> llvm::iterator_range<PostorderIterator> {
   return {PostorderIterator(NodeId(0)),
           PostorderIterator(NodeId(node_impls_.size()))};

+ 6 - 12
toolchain/parse/tree.h

@@ -69,11 +69,12 @@ class Tree : public Printable<Tree> {
     ApiOrImpl api_or_impl;
   };
 
-  // Parses the token buffer into a `Tree`.
-  //
-  // This is the factory function which is used to build parse trees.
-  static auto Parse(Lex::TokenizedBuffer& tokens, DiagnosticConsumer& consumer,
-                    llvm::raw_ostream* vlog_stream) -> Tree;
+  // Wires up the reference to the tokenized buffer. The `Parse` function should
+  // be used to actually parse the tokens into a tree.
+  explicit Tree(Lex::TokenizedBuffer& tokens_arg) : tokens_(&tokens_arg) {
+    // If the tree is valid, there will be one node per token, so reserve once.
+    node_impls_.reserve(tokens_->expected_parse_tree_size());
+  }
 
   // Tests whether there are any errors in the parse tree.
   auto has_errors() const -> bool { return has_errors_; }
@@ -279,13 +280,6 @@ class Tree : public Printable<Tree> {
   static_assert(sizeof(NodeImpl) == 12,
                 "Unexpected size of node implementation!");
 
-  // Wires up the reference to the tokenized buffer. The `Parse` function should
-  // be used to actually parse the tokens into a tree.
-  explicit Tree(Lex::TokenizedBuffer& tokens_arg) : tokens_(&tokens_arg) {
-    // If the tree is valid, there will be one node per token, so reserve once.
-    node_impls_.reserve(tokens_->expected_parse_tree_size());
-  }
-
   // Prints a single node for Print(). Returns true when preorder and there are
   // children.
   auto PrintNode(llvm::raw_ostream& output, NodeId n, int depth,

+ 5 - 4
toolchain/parse/tree_test.cpp

@@ -15,6 +15,7 @@
 #include "toolchain/diagnostics/mocks.h"
 #include "toolchain/lex/lex.h"
 #include "toolchain/lex/tokenized_buffer.h"
+#include "toolchain/parse/parse.h"
 #include "toolchain/testing/yaml_test_helpers.h"
 
 namespace Carbon::Parse {
@@ -51,13 +52,13 @@ class TreeTest : public ::testing::Test {
 
 TEST_F(TreeTest, IsValid) {
   Lex::TokenizedBuffer& tokens = GetTokenizedBuffer("");
-  Tree tree = Tree::Parse(tokens, consumer_, /*vlog_stream=*/nullptr);
+  Tree tree = Parse(tokens, consumer_, /*vlog_stream=*/nullptr);
   EXPECT_TRUE((*tree.postorder().begin()).is_valid());
 }
 
 TEST_F(TreeTest, PrintPostorderAsYAML) {
   Lex::TokenizedBuffer& tokens = GetTokenizedBuffer("fn F();");
-  Tree tree = Tree::Parse(tokens, consumer_, /*vlog_stream=*/nullptr);
+  Tree tree = Parse(tokens, consumer_, /*vlog_stream=*/nullptr);
   EXPECT_FALSE(tree.has_errors());
   TestRawOstream print_stream;
   tree.Print(print_stream);
@@ -85,7 +86,7 @@ TEST_F(TreeTest, PrintPostorderAsYAML) {
 
 TEST_F(TreeTest, PrintPreorderAsYAML) {
   Lex::TokenizedBuffer& tokens = GetTokenizedBuffer("fn F();");
-  Tree tree = Tree::Parse(tokens, consumer_, /*vlog_stream=*/nullptr);
+  Tree tree = Parse(tokens, consumer_, /*vlog_stream=*/nullptr);
   EXPECT_FALSE(tree.has_errors());
   TestRawOstream print_stream;
   tree.Print(print_stream, /*preorder=*/true);
@@ -131,7 +132,7 @@ TEST_F(TreeTest, HighRecursion) {
   Lex::TokenizedBuffer& tokens = GetTokenizedBuffer(code);
   ASSERT_FALSE(tokens.has_errors());
   Testing::MockDiagnosticConsumer consumer;
-  Tree tree = Tree::Parse(tokens, consumer, /*vlog_stream=*/nullptr);
+  Tree tree = Parse(tokens, consumer, /*vlog_stream=*/nullptr);
   EXPECT_FALSE(tree.has_errors());
 }
 

+ 3 - 3
toolchain/parse/typed_nodes_test.cpp

@@ -12,7 +12,7 @@
 #include "toolchain/diagnostics/mocks.h"
 #include "toolchain/lex/lex.h"
 #include "toolchain/lex/tokenized_buffer.h"
-#include "toolchain/parse/tree.h"
+#include "toolchain/parse/parse.h"
 
 namespace Carbon::Parse {
 namespace {
@@ -40,8 +40,8 @@ class TypedNodeTest : public ::testing::Test {
   }
 
   auto GetTree(llvm::StringRef t) -> Tree& {
-    tree_storage_.push_front(Tree::Parse(GetTokenizedBuffer(t), consumer_,
-                                         /*vlog_stream=*/nullptr));
+    tree_storage_.push_front(Parse(GetTokenizedBuffer(t), consumer_,
+                                   /*vlog_stream=*/nullptr));
     return tree_storage_.front();
   }