Quellcode durchsuchen

[toolchain] Parsing support for struct literals, following #561 / #653. (#699)

This supports both struct type literals, `{.x: i32, .y: i32}`, and struct
value literals, `{.x = 3, .y = 4}`. The degenerate case of `{}` is
treated as a struct value literal, with the expectation that an empty
struct value has the same type/value duality as an empty tuple value.
Richard Smith vor 4 Jahren
Ursprung
Commit
fe28dd9a33

+ 1 - 0
toolchain/parser/BUILD

@@ -65,6 +65,7 @@ cc_test(
         ":parse_tree",
         "//toolchain/common:yaml_test_helpers",
         "//toolchain/diagnostics:diagnostic_emitter",
+        "//toolchain/diagnostics:mocks",
         "//toolchain/lexer:tokenized_buffer",
         "@llvm-project//llvm:Support",
         "@llvm-project//llvm:gmock",

+ 9 - 0
toolchain/parser/parse_node_kind.def

@@ -58,4 +58,13 @@ CARBON_PARSE_NODE_KIND(PrefixOperator)
 CARBON_PARSE_NODE_KIND(InfixOperator)
 CARBON_PARSE_NODE_KIND(PostfixOperator)
 
+// Struct literals.
+CARBON_PARSE_NODE_KIND(StructLiteral)
+CARBON_PARSE_NODE_KIND(StructTypeLiteral)
+CARBON_PARSE_NODE_KIND(StructFieldDesignator)
+CARBON_PARSE_NODE_KIND(StructFieldValue)
+CARBON_PARSE_NODE_KIND(StructFieldType)
+CARBON_PARSE_NODE_KIND(StructComma)
+CARBON_PARSE_NODE_KIND(StructEnd)
+
 #undef CARBON_PARSE_NODE_KIND

+ 110 - 2
toolchain/parser/parse_tree_test.cpp

@@ -12,6 +12,7 @@
 #include "llvm/Support/SourceMgr.h"
 #include "toolchain/common/yaml_test_helpers.h"
 #include "toolchain/diagnostics/diagnostic_emitter.h"
+#include "toolchain/diagnostics/mocks.h"
 #include "toolchain/lexer/tokenized_buffer.h"
 #include "toolchain/parser/parse_node_kind.h"
 #include "toolchain/parser/parse_test_helpers.h"
@@ -19,11 +20,13 @@
 namespace Carbon {
 namespace {
 
+using Carbon::Testing::DiagnosticMessage;
 using Carbon::Testing::ExpectedNode;
 using Carbon::Testing::MatchParseTreeNodes;
 using namespace Carbon::Testing::NodeMatchers;
 using ::testing::ElementsAre;
 using ::testing::Eq;
+using ::testing::HasSubstr;
 using ::testing::Ne;
 using ::testing::NotNull;
 using ::testing::StrEq;
@@ -405,10 +408,11 @@ TEST_F(ParseTreeTest, InvalidDesignators) {
                              MatchExpressionStatement(
                                  MatchDesignatorExpression(
                                      MatchNameReference("a"), ".", HasError),
-                                 ";"),
+                                 HasError, ";"),
                              MatchExpressionStatement(
                                  MatchDesignatorExpression(
-                                     MatchNameReference("a"), ".", HasError),
+                                     MatchNameReference("a"), ".",
+                                     MatchDesignatedName("fn", HasError)),
                                  ";"),
                              MatchExpressionStatement(
                                  MatchDesignatorExpression(
@@ -970,6 +974,110 @@ TEST_F(ParseTreeTest, Tuples) {
            MatchFileEnd()}));
 }
 
+TEST_F(ParseTreeTest, Structs) {
+  TokenizedBuffer tokens = GetTokenizedBuffer(R"(
+    var x: {.a: i32, .b: i32} = {.a = 1, .b = 2};
+    var y: {} = {};
+    var z: {.n: i32,} = {.n = 4,};
+  )");
+  ParseTree tree = ParseTree::Parse(tokens, consumer);
+  EXPECT_FALSE(tree.HasErrors());
+
+  EXPECT_THAT(
+      tree,
+      MatchParseTreeNodes(
+          {MatchVariableDeclaration(
+               MatchPatternBinding(
+                   MatchDeclaredName("x"), ":",
+                   MatchStructTypeLiteral(
+                       MatchStructFieldType(MatchStructFieldDesignator(
+                                                ".", MatchDesignatedName("a")),
+                                            ":", MatchLiteral("i32")),
+                       MatchStructComma(),
+                       MatchStructFieldType(MatchStructFieldDesignator(
+                                                ".", MatchDesignatedName("b")),
+                                            ":", MatchLiteral("i32")),
+                       MatchStructEnd())),
+               MatchVariableInitializer(MatchStructLiteral(
+                   MatchStructFieldValue(MatchStructFieldDesignator(
+                                             ".", MatchDesignatedName("a")),
+                                         "=", MatchLiteral("1")),
+                   MatchStructComma(),
+                   MatchStructFieldValue(MatchStructFieldDesignator(
+                                             ".", MatchDesignatedName("b")),
+                                         "=", MatchLiteral("2")),
+                   MatchStructEnd())),
+               MatchDeclarationEnd()),
+           MatchVariableDeclaration(
+               MatchPatternBinding(MatchDeclaredName("y"), ":",
+                                   MatchStructLiteral(MatchStructEnd())),
+               MatchVariableInitializer(MatchStructLiteral(MatchStructEnd())),
+               MatchDeclarationEnd()),
+           MatchVariableDeclaration(
+               MatchPatternBinding(
+                   MatchDeclaredName("z"), ":",
+                   MatchStructTypeLiteral(
+                       MatchStructFieldType(MatchStructFieldDesignator(
+                                                ".", MatchDesignatedName("n")),
+                                            ":", MatchLiteral("i32")),
+                       MatchStructComma(), MatchStructEnd())),
+               MatchVariableInitializer(MatchStructLiteral(
+                   MatchStructFieldValue(MatchStructFieldDesignator(
+                                             ".", MatchDesignatedName("n")),
+                                         "=", MatchLiteral("4")),
+                   MatchStructComma(), MatchStructEnd())),
+               MatchDeclarationEnd()),
+           MatchFileEnd()}));
+}
+
+TEST_F(ParseTreeTest, StructErrors) {
+  struct Testcase {
+    llvm::StringLiteral input;
+    ::testing::Matcher<const Diagnostic&> diag_matcher;
+  };
+  Testcase testcases[] = {
+      {"var x: {i32} = {};",
+       DiagnosticMessage("Expected `.field: type` or `.field = value`.")},
+      {"var x: {a} = {};",
+       DiagnosticMessage("Expected `.field: type` or `.field = value`.")},
+      {"var x: {a:} = {};",
+       DiagnosticMessage("Expected `.field: type` or `.field = value`.")},
+      {"var x: {a=} = {};",
+       DiagnosticMessage("Expected `.field: type` or `.field = value`.")},
+      {"var x: {.} = {};", DiagnosticMessage("Expected identifier after `.`.")},
+      {"var x: {.\"hello\" = 0, .y = 4} = {};",
+       DiagnosticMessage("Expected identifier after `.`.")},
+      {"var x: {.\"hello\": i32, .y: i32} = {};",
+       DiagnosticMessage("Expected identifier after `.`.")},
+      {"var x: {.a} = {};",
+       DiagnosticMessage("Expected `.field: type` or `.field = value`.")},
+      {"var x: {.a:} = {};", DiagnosticMessage("Expected expression.")},
+      {"var x: {.a=} = {};", DiagnosticMessage("Expected expression.")},
+      {"var x: {.a: i32, .b = 0} = {};",
+       DiagnosticMessage("Expected `.field: type`.")},
+      {"var x: {.a = 0, b: i32} = {};",
+       DiagnosticMessage("Expected `.field = value`.")},
+      {"var x: {,} = {};",
+       DiagnosticMessage("Expected `.field: type` or `.field = value`.")},
+      {"var x: {.a: i32,,} = {};",
+       DiagnosticMessage("Expected `.field: type`.")},
+      {"var x: {.a = 0,,} = {};",
+       DiagnosticMessage("Expected `.field = value`.")},
+      {"var x: {.a: i32 banana} = {.a = 0};",
+       DiagnosticMessage("Expected `,` or `}`.")},
+      {"var x: {.a: i32} = {.a = 0 banana};",
+       DiagnosticMessage("Expected `,` or `}`.")},
+  };
+
+  for (Testcase testcase : testcases) {
+    TokenizedBuffer tokens = GetTokenizedBuffer(testcase.input);
+    Testing::MockDiagnosticConsumer consumer;
+    EXPECT_CALL(consumer, HandleDiagnostic(testcase.diag_matcher));
+    ParseTree tree = ParseTree::Parse(tokens, consumer);
+    EXPECT_TRUE(tree.HasErrors());
+  }
+}
+
 auto GetAndDropLine(llvm::StringRef& s) -> std::string {
   auto newline_offset = s.find_first_of('\n');
   llvm::StringRef line = s.slice(0, newline_offset);

+ 132 - 19
toolchain/parser/parser_impl.cpp

@@ -54,6 +54,29 @@ struct ExpectedParameterName : SimpleDiagnostic<ExpectedParameterName> {
       "Expected parameter declaration.";
 };
 
+struct ExpectedStructLiteralField
+    : SimpleDiagnostic<ExpectedStructLiteralField> {
+  static constexpr llvm::StringLiteral ShortName = "syntax-error";
+
+  bool can_be_type;
+  bool can_be_value;
+
+  auto Format() -> std::string {
+    std::string result = "Expected ";
+    if (can_be_type) {
+      result += "`.field: type`";
+    }
+    if (can_be_type && can_be_value) {
+      result += " or ";
+    }
+    if (can_be_value) {
+      result += "`.field = value`";
+    }
+    result += ".";
+    return result;
+  }
+};
+
 struct UnrecognizedDeclaration : SimpleDiagnostic<UnrecognizedDeclaration> {
   static constexpr llvm::StringLiteral ShortName = "syntax-error";
   static constexpr llvm::StringLiteral Message =
@@ -118,7 +141,13 @@ struct ExpectedIdentifierAfterDot
 struct UnexpectedTokenAfterListElement
     : SimpleDiagnostic<UnexpectedTokenAfterListElement> {
   static constexpr llvm::StringLiteral ShortName = "syntax-error";
-  static constexpr llvm::StringLiteral Message = "Expected `,` or `)`.";
+  static constexpr const char* Message = "Expected `,` or `{0}`.";
+
+  TokenKind close;
+
+  auto Format() -> std::string {
+    return llvm::formatv(Message, close.GetFixedSpelling()).str();
+  }
 };
 
 struct BinaryOperatorRequiresWhitespace
@@ -387,57 +416,57 @@ auto ParseTree::Parser::ParseCloseParen(TokenizedBuffer::Token open_paren,
 }
 
 template <typename ListElementParser, typename ListCompletionHandler>
-auto ParseTree::Parser::ParseParenList(ListElementParser list_element_parser,
-                                       ParseNodeKind comma_kind,
-                                       ListCompletionHandler list_handler,
-                                       bool allow_trailing_comma)
+auto ParseTree::Parser::ParseList(TokenKind open, TokenKind close,
+                                  ListElementParser list_element_parser,
+                                  ParseNodeKind comma_kind,
+                                  ListCompletionHandler list_handler,
+                                  bool allow_trailing_comma)
     -> llvm::Optional<Node> {
   // `(` element-list[opt] `)`
   //
   // element-list ::= element
   //              ::= element `,` element-list
-  TokenizedBuffer::Token open_paren = Consume(TokenKind::OpenParen());
+  TokenizedBuffer::Token open_paren = Consume(open);
 
   bool has_errors = false;
   bool any_commas = false;
   int64_t num_elements = 0;
 
   // Parse elements, if any are specified.
-  if (!NextTokenIs(TokenKind::CloseParen())) {
+  if (!NextTokenIs(close)) {
     while (true) {
       bool element_error = !list_element_parser();
       has_errors |= element_error;
       ++num_elements;
 
-      if (!NextTokenIsOneOf({TokenKind::CloseParen(), TokenKind::Comma()})) {
+      if (!NextTokenIsOneOf({close, TokenKind::Comma()})) {
         if (!element_error) {
-          emitter.EmitError<UnexpectedTokenAfterListElement>(*position);
+          emitter.EmitError<UnexpectedTokenAfterListElement>(*position,
+                                                             {.close = close});
         }
         has_errors = true;
 
-        auto end_of_element =
-            FindNextOf({TokenKind::Comma(), TokenKind::CloseParen()});
+        auto end_of_element = FindNextOf({TokenKind::Comma(), close});
         // The lexer guarantees that parentheses are balanced.
         assert(end_of_element && "missing matching `)` for `(`");
         SkipTo(*end_of_element);
       }
 
-      if (NextTokenIs(TokenKind::CloseParen())) {
+      if (NextTokenIs(close)) {
         break;
       }
 
       AddLeafNode(comma_kind, Consume(TokenKind::Comma()));
       any_commas = true;
 
-      if (allow_trailing_comma && NextTokenIs(TokenKind::CloseParen())) {
+      if (allow_trailing_comma && NextTokenIs(close)) {
         break;
       }
     }
   }
 
   bool is_single_item = num_elements == 1 && !any_commas;
-  return list_handler(open_paren, is_single_item,
-                      Consume(TokenKind::CloseParen()), has_errors);
+  return list_handler(open_paren, is_single_item, Consume(close), has_errors);
 }
 
 auto ParseTree::Parser::ParsePattern(PatternKind kind) -> llvm::Optional<Node> {
@@ -683,6 +712,80 @@ auto ParseTree::Parser::ParseParenExpression() -> llvm::Optional<Node> {
       /*allow_trailing_comma=*/true);
 }
 
+auto ParseTree::Parser::ParseBraceExpression() -> llvm::Optional<Node> {
+  // braced-expression ::= `{` [field-value-list] `}`
+  //                   ::= `{` field-type-list `}`
+  // field-value-list ::= field-value [`,`]
+  //                  ::= field-value `,` field-value-list
+  // field-value ::= `.` identifier `=` expression
+  // field-type-list ::= field-type [`,`]
+  //                 ::= field-type `,` field-type-list
+  // field-type ::= `.` identifier `:` type
+  //
+  // Note that `{` `}` is the first form (an empty struct), but that an empty
+  // struct value also behaves as an empty struct type.
+  auto start = GetSubtreeStartPosition();
+  enum Kind { Unknown, Value, Type };
+  Kind kind = Unknown;
+  return ParseList(
+      TokenKind::OpenCurlyBrace(), TokenKind::CloseCurlyBrace(),
+      [&]() -> llvm::Optional<Node> {
+        auto start_elem = GetSubtreeStartPosition();
+
+        auto diagnose_invalid_syntax = [&] {
+          emitter.EmitError<ExpectedStructLiteralField>(
+              *position,
+              {.can_be_type = kind != Value, .can_be_value = kind != Type});
+          return llvm::None;
+        };
+
+        if (!NextTokenIs(TokenKind::Period())) {
+          return diagnose_invalid_syntax();
+        }
+        auto designator = ParseDesignatorExpression(
+            start_elem, ParseNodeKind::StructFieldDesignator(),
+            /*has_errors=*/false);
+        if (!designator) {
+          auto recovery_pos = FindNextOf(
+              {TokenKind::Equal(), TokenKind::Colon(), TokenKind::Comma()});
+          if (!recovery_pos ||
+              tokens.GetKind(*recovery_pos) == TokenKind::Comma()) {
+            return llvm::None;
+          }
+          SkipTo(*recovery_pos);
+        }
+
+        // Work out the kind of this element
+        Kind elem_kind =
+            (NextTokenIs(TokenKind::Equal())
+                 ? Value
+                 : NextTokenIs(TokenKind::Colon()) ? Type : Unknown);
+        if (elem_kind == Unknown || (kind != Unknown && elem_kind != kind)) {
+          return diagnose_invalid_syntax();
+        }
+        kind = elem_kind;
+
+        // Struct type fields and value fields use the same grammar except that
+        // one has a `:` separator and the other has an `=` separator.
+        auto equal_or_colon_token =
+            Consume(kind == Type ? TokenKind::Colon() : TokenKind::Equal());
+        auto type_or_value = ParseExpression();
+        return AddNode(kind == Type ? ParseNodeKind::StructFieldType()
+                                    : ParseNodeKind::StructFieldValue(),
+                       equal_or_colon_token, start_elem,
+                       /*has_error=*/!designator || !type_or_value);
+      },
+      ParseNodeKind::StructComma(),
+      [&](TokenizedBuffer::Token open_brace, bool is_single_item,
+          TokenizedBuffer::Token close_brace, bool has_errors) {
+        AddLeafNode(ParseNodeKind::StructEnd(), close_brace);
+        return AddNode(kind == Type ? ParseNodeKind::StructTypeLiteral()
+                                    : ParseNodeKind::StructLiteral(),
+                       open_brace, start, has_errors);
+      },
+      /*allow_trailing_comma=*/true);
+}
+
 auto ParseTree::Parser::ParsePrimaryExpression() -> llvm::Optional<Node> {
   llvm::Optional<ParseNodeKind> kind;
   switch (NextTokenKind()) {
@@ -702,6 +805,9 @@ auto ParseTree::Parser::ParsePrimaryExpression() -> llvm::Optional<Node> {
     case TokenKind::OpenParen():
       return ParseParenExpression();
 
+    case TokenKind::OpenCurlyBrace():
+      return ParseBraceExpression();
+
     default:
       emitter.EmitError<ExpectedExpression>(*position);
       return llvm::None;
@@ -711,6 +817,7 @@ auto ParseTree::Parser::ParsePrimaryExpression() -> llvm::Optional<Node> {
 }
 
 auto ParseTree::Parser::ParseDesignatorExpression(SubtreeStart start,
+                                                  ParseNodeKind kind,
                                                   bool has_errors)
     -> llvm::Optional<Node> {
   // `.` identifier
@@ -723,11 +830,16 @@ auto ParseTree::Parser::ParseDesignatorExpression(SubtreeStart start,
     // If we see a keyword, assume it was intended to be the designated name.
     // TODO: Should keywords be valid in designators?
     if (NextTokenKind().IsKeyword()) {
-      Consume(NextTokenKind());
+      name = Consume(NextTokenKind());
+      auto name_node = AddLeafNode(ParseNodeKind::DesignatedName(), *name);
+      MarkNodeError(name_node);
+    } else {
+      has_errors = true;
     }
-    has_errors = true;
   }
-  return AddNode(ParseNodeKind::DesignatorExpression(), dot, start, has_errors);
+
+  Node result = AddNode(kind, dot, start, has_errors);
+  return name ? result : llvm::Optional<Node>();
 }
 
 auto ParseTree::Parser::ParseCallExpression(SubtreeStart start, bool has_errors)
@@ -753,7 +865,8 @@ auto ParseTree::Parser::ParsePostfixExpression() -> llvm::Optional<Node> {
   while (true) {
     switch (NextTokenKind()) {
       case TokenKind::Period():
-        expression = ParseDesignatorExpression(start, !expression);
+        expression = ParseDesignatorExpression(
+            start, ParseNodeKind::DesignatorExpression(), !expression);
         break;
 
       case TokenKind::OpenParen():

+ 17 - 3
toolchain/parser/parser_impl.h

@@ -134,13 +134,24 @@ class ParseTree::Parser {
   auto ParseCloseParen(TokenizedBuffer::Token open_paren, ParseNodeKind kind)
       -> llvm::Optional<Node>;
 
+  // Parses a comma-separated list with the given delimiters.
+  template <typename ListElementParser, typename ListCompletionHandler>
+  auto ParseList(TokenKind open, TokenKind close,
+                 ListElementParser list_element_parser,
+                 ParseNodeKind comma_kind, ListCompletionHandler list_handler,
+                 bool allow_trailing_comma = false) -> llvm::Optional<Node>;
+
   // Parses a parenthesized, comma-separated list.
   template <typename ListElementParser, typename ListCompletionHandler>
   auto ParseParenList(ListElementParser list_element_parser,
                       ParseNodeKind comma_kind,
                       ListCompletionHandler list_handler,
                       bool allow_trailing_comma = false)
-      -> llvm::Optional<Node>;
+      -> llvm::Optional<Node> {
+    return ParseList(TokenKind::OpenParen(), TokenKind::CloseParen(),
+                     list_element_parser, comma_kind, list_handler,
+                     allow_trailing_comma);
+  }
 
   // Parses a single function parameter declaration.
   auto ParseFunctionParameter() -> llvm::Optional<Node>;
@@ -174,14 +185,17 @@ class ParseTree::Parser {
   // Parses a parenthesized expression.
   auto ParseParenExpression() -> llvm::Optional<Node>;
 
+  // Parses a braced expression.
+  auto ParseBraceExpression() -> llvm::Optional<Node>;
+
   // Parses a primary expression, which is either a terminal portion of an
   // expression tree, such as an identifier or literal, or a parenthesized
   // expression.
   auto ParsePrimaryExpression() -> llvm::Optional<Node>;
 
   // Parses a designator expression suffix starting with `.`.
-  auto ParseDesignatorExpression(SubtreeStart start, bool has_errors)
-      -> llvm::Optional<Node>;
+  auto ParseDesignatorExpression(SubtreeStart start, ParseNodeKind kind,
+                                 bool has_errors) -> llvm::Optional<Node>;
 
   // Parses a call expression suffix starting with `(`.
   auto ParseCallExpression(SubtreeStart start, bool has_errors)