Просмотр исходного кода

[toolchain] Parse postfix operator `*` as a type operator. (#576)

Richard Smith 4 лет назад
Родитель
Сommit
1b122924e8

+ 1 - 1
toolchain/diagnostics/diagnostic_emitter.h

@@ -153,7 +153,7 @@ struct SimpleDiagnostic {
 // produced.
 class ErrorTrackingDiagnosticConsumer : public DiagnosticConsumer {
  public:
-  ErrorTrackingDiagnosticConsumer(DiagnosticConsumer& next_consumer)
+  explicit ErrorTrackingDiagnosticConsumer(DiagnosticConsumer& next_consumer)
       : next_consumer(&next_consumer) {}
 
   auto HandleDiagnostic(const Diagnostic& diagnostic) -> void override {

+ 4 - 0
toolchain/driver/driver_test.cpp

@@ -164,6 +164,8 @@ TEST(DriverTest, DumpTokens) {
   EXPECT_THAT(&*token_it, IsKeyValueScalars("spelling", "Hello"));
   ++token_it;
   EXPECT_THAT(&*token_it, IsKeyValueScalars("identifier", "0"));
+  ++token_it;
+  EXPECT_THAT(&*token_it, IsKeyValueScalars("has_trailing_space", "true"));
   EXPECT_THAT(++token_it, Eq(token_value_node->end()));
 
   ++mapping_it;
@@ -189,6 +191,8 @@ TEST(DriverTest, DumpTokens) {
   EXPECT_THAT(&*token_it, IsKeyValueScalars("spelling", "World"));
   ++token_it;
   EXPECT_THAT(&*token_it, IsKeyValueScalars("identifier", "1"));
+  ++token_it;
+  EXPECT_THAT(&*token_it, IsKeyValueScalars("has_trailing_space", "true"));
   EXPECT_THAT(++token_it, Eq(token_value_node->end()));
 
   ++mapping_it;

+ 2 - 0
toolchain/lexer/token_registry.def

@@ -92,6 +92,7 @@ CARBON_SYMBOL_TOKEN(Tilde,               "~")
 // clang-format on
 CARBON_OPENING_GROUP_SYMBOL_TOKEN(OpenParen, "(", CloseParen)
 CARBON_OPENING_GROUP_SYMBOL_TOKEN(OpenCurlyBrace, "{", CloseCurlyBrace)
+CARBON_OPENING_GROUP_SYMBOL_TOKEN(OpenSquareBracket, "[", CloseSquareBracket)
 // clang-format off
 #undef CARBON_OPENING_GROUP_SYMBOL_TOKEN
 
@@ -102,6 +103,7 @@ CARBON_OPENING_GROUP_SYMBOL_TOKEN(OpenCurlyBrace, "{", CloseCurlyBrace)
 // clang-format on
 CARBON_CLOSING_GROUP_SYMBOL_TOKEN(CloseParen, ")", OpenParen)
 CARBON_CLOSING_GROUP_SYMBOL_TOKEN(CloseCurlyBrace, "}", OpenCurlyBrace)
+CARBON_CLOSING_GROUP_SYMBOL_TOKEN(CloseSquareBracket, "]", OpenSquareBracket)
 // clang-format off
 #undef CARBON_CLOSING_GROUP_SYMBOL_TOKEN
 

+ 35 - 5
toolchain/lexer/tokenized_buffer.cpp

@@ -146,7 +146,15 @@ class TokenizedBuffer::Lexer {
     set_indent = false;
   }
 
+  auto NoteWhitespace() -> void {
+    if (!buffer.token_infos.empty()) {
+      buffer.token_infos.back().has_trailing_space = true;
+    }
+  }
+
   auto SkipWhitespace(llvm::StringRef& source_text) -> bool {
+    const char* const whitespace_start = source_text.begin();
+
     while (!source_text.empty()) {
       // We only support line-oriented commenting and lex comments as-if they
       // were whitespace.
@@ -174,6 +182,9 @@ class TokenizedBuffer::Lexer {
           // If we find a non-whitespace character without exhausting the
           // buffer, return true to continue lexing.
           assert(!IsSpace(source_text.front()));
+          if (whitespace_start != source_text.begin()) {
+            NoteWhitespace();
+          }
           return true;
 
         case '\n':
@@ -374,13 +385,17 @@ class TokenizedBuffer::Lexer {
       open_groups.pop_back();
       token_emitter.EmitError<MismatchedClosing>(opening_token);
 
+      assert(!buffer.Tokens().empty() && "Must have a prior opening token!");
+      Token prev_token = buffer.Tokens().end()[-1];
+
       // TODO: do a smarter backwards scan for where to put the closing
       // token.
-      Token closing_token =
-          buffer.AddToken({.kind = opening_kind.GetClosingSymbol(),
-                           .is_recovery = true,
-                           .token_line = current_line,
-                           .column = current_column});
+      Token closing_token = buffer.AddToken(
+          {.kind = opening_kind.GetClosingSymbol(),
+           .has_trailing_space = buffer.HasTrailingWhitespace(prev_token),
+           .is_recovery = true,
+           .token_line = current_line,
+           .column = current_column});
       TokenInfo& opening_token_info = buffer.GetTokenInfo(opening_token);
       TokenInfo& closing_token_info = buffer.GetTokenInfo(closing_token);
       opening_token_info.closing_token = closing_token;
@@ -502,6 +517,9 @@ auto TokenizedBuffer::Lex(SourceBuffer& source, DiagnosticConsumer& consumer)
     assert(result && "No token was lexed.");
   }
 
+  // The end-of-file token is always considered to be whitespace.
+  lexer.NoteWhitespace();
+
   lexer.CloseInvalidOpenGroups(TokenKind::Error());
   lexer.AddEndOfFileToken();
 
@@ -627,6 +645,15 @@ auto TokenizedBuffer::GetMatchedOpeningToken(Token closing_token) const
   return closing_token_info.opening_token;
 }
 
+auto TokenizedBuffer::HasLeadingWhitespace(Token token) const -> bool {
+  auto it = TokenIterator(token);
+  return it == Tokens().begin() || GetTokenInfo(*(it - 1)).has_trailing_space;
+}
+
+auto TokenizedBuffer::HasTrailingWhitespace(Token token) const -> bool {
+  return GetTokenInfo(token).has_trailing_space;
+}
+
 auto TokenizedBuffer::IsRecoveryToken(Token token) const -> bool {
   return GetTokenInfo(token).is_recovery;
 }
@@ -733,6 +760,9 @@ auto TokenizedBuffer::PrintToken(llvm::raw_ostream& output_stream, Token token,
   }
   // TODO: Include value for numeric literals.
 
+  if (token_info.has_trailing_space) {
+    output_stream << ", has_trailing_space: true";
+  }
   if (token_info.is_recovery) {
     output_stream << ", recovery: true";
   }

+ 10 - 2
toolchain/lexer/tokenized_buffer.h

@@ -166,7 +166,7 @@ class TokenizedBuffer {
   // Random-access iterator over tokens within the buffer.
   class TokenIterator
       : public llvm::iterator_facade_base<
-            TokenIterator, std::random_access_iterator_tag, Token, int> {
+            TokenIterator, std::random_access_iterator_tag, const Token, int> {
    public:
     TokenIterator() = default;
 
@@ -180,8 +180,8 @@ class TokenizedBuffer {
     }
 
     auto operator*() const -> const Token& { return token; }
-    auto operator*() -> Token& { return token; }
 
+    using iterator_facade_base::operator-;
     auto operator-(const TokenIterator& rhs) const -> int {
       return token.index - rhs.token.index;
     }
@@ -303,6 +303,11 @@ class TokenizedBuffer {
   // The given token must be a closing token kind.
   [[nodiscard]] auto GetMatchedOpeningToken(Token closing_token) const -> Token;
 
+  // Returns whether the given token has leading whitespace.
+  [[nodiscard]] auto HasLeadingWhitespace(Token token) const -> bool;
+  // Returns whether the given token has trailing whitespace.
+  [[nodiscard]] auto HasTrailingWhitespace(Token token) const -> bool;
+
   // Returns whether the token was created as part of an error recovery effort.
   //
   // For example, a closing paren inserted to match an unmatched paren.
@@ -380,6 +385,9 @@ class TokenizedBuffer {
   struct TokenInfo {
     TokenKind kind;
 
+    // Whether the token has trailing whitespace.
+    bool has_trailing_space = false;
+
     // Whether the token was injected artificially during error recovery.
     bool is_recovery = false;
 

+ 45 - 4
toolchain/lexer/tokenized_buffer_test.cpp

@@ -543,6 +543,40 @@ TEST_F(LexerTest, MismatchedGroups) {
       }));
 }
 
+TEST_F(LexerTest, Whitespace) {
+  auto buffer = Lex("{( } {(");
+
+  // Whether there should be whitespace before/after each token.
+  bool space[] = {true,
+                  // {
+                  false,
+                  // (
+                  true,
+                  // inserted )
+                  true,
+                  // }
+                  true,
+                  // {
+                  false,
+                  // (
+                  true,
+                  // inserted )
+                  true,
+                  // inserted }
+                  true,
+                  // EOF
+                  false};
+  int pos = 0;
+  for (TokenizedBuffer::Token token : buffer.Tokens()) {
+    ASSERT_LT(pos, std::size(space));
+    EXPECT_THAT(buffer.HasLeadingWhitespace(token), Eq(space[pos]));
+    ++pos;
+    ASSERT_LT(pos, std::size(space));
+    EXPECT_THAT(buffer.HasTrailingWhitespace(token), Eq(space[pos]));
+  }
+  ASSERT_EQ(pos + 1, std::size(space));
+}
+
 TEST_F(LexerTest, Keywords) {
   auto buffer = Lex("   fn");
   EXPECT_FALSE(buffer.HasErrors());
@@ -861,7 +895,7 @@ TEST_F(LexerTest, Printing) {
   llvm::StringRef print = print_stream.str();
   EXPECT_THAT(GetAndDropLine(print),
               StrEq("token: { index: 0, kind:      'Semi', line: 1, column: 1, "
-                    "indent: 1, spelling: ';' }"));
+                    "indent: 1, spelling: ';', has_trailing_space: true }"));
   EXPECT_THAT(GetAndDropLine(print),
               StrEq("token: { index: 1, kind: 'EndOfFile', line: 1, column: 2, "
                     "indent: 1, spelling: '' }"));
@@ -887,7 +921,8 @@ TEST_F(LexerTest, Printing) {
                     "6, indent: 1, spelling: ';' }"));
   EXPECT_THAT(GetAndDropLine(print),
               StrEq("token: { index: 4, kind: 'CloseParen', line: 1, column: "
-                    "7, indent: 1, spelling: ')', opening_token: 0 }"));
+                    "7, indent: 1, spelling: ')', opening_token: 0, "
+                    "has_trailing_space: true }"));
   EXPECT_THAT(GetAndDropLine(print),
               StrEq("token: { index: 5, kind:  'EndOfFile', line: 1, column: "
                     "8, indent: 1, spelling: '' }"));
@@ -902,7 +937,7 @@ TEST_F(LexerTest, Printing) {
   EXPECT_THAT(
       GetAndDropLine(print),
       StrEq("token: { index: 0, kind:      'Semi', line:  1, column:  1, "
-            "indent: 1, spelling: ';' }"));
+            "indent: 1, spelling: ';', has_trailing_space: true }"));
   EXPECT_THAT(
       GetAndDropLine(print),
       StrEq("token: { index: 1, kind:      'Semi', line: 11, column:  9, "
@@ -910,7 +945,7 @@ TEST_F(LexerTest, Printing) {
   EXPECT_THAT(
       GetAndDropLine(print),
       StrEq("token: { index: 2, kind:      'Semi', line: 11, column: 10, "
-            "indent: 9, spelling: ';' }"));
+            "indent: 9, spelling: ';', has_trailing_space: true }"));
   EXPECT_THAT(
       GetAndDropLine(print),
       StrEq("token: { index: 3, kind: 'EndOfFile', line: 11, column: 11, "
@@ -958,6 +993,8 @@ TEST_F(LexerTest, PrintingAsYaml) {
   EXPECT_THAT(&*token_it, IsKeyValueScalars("indent", "2"));
   ++token_it;
   EXPECT_THAT(&*token_it, IsKeyValueScalars("spelling", ";"));
+  ++token_it;
+  EXPECT_THAT(&*token_it, IsKeyValueScalars("has_trailing_space", "true"));
   EXPECT_THAT(++token_it, Eq(token_value_node->end()));
 
   ++mapping_it;
@@ -981,6 +1018,8 @@ TEST_F(LexerTest, PrintingAsYaml) {
   EXPECT_THAT(&*token_it, IsKeyValueScalars("indent", "1"));
   ++token_it;
   EXPECT_THAT(&*token_it, IsKeyValueScalars("spelling", ";"));
+  ++token_it;
+  EXPECT_THAT(&*token_it, IsKeyValueScalars("has_trailing_space", "true"));
   EXPECT_THAT(++token_it, Eq(token_value_node->end()));
 
   ++mapping_it;
@@ -1004,6 +1043,8 @@ TEST_F(LexerTest, PrintingAsYaml) {
   EXPECT_THAT(&*token_it, IsKeyValueScalars("indent", "1"));
   ++token_it;
   EXPECT_THAT(&*token_it, IsKeyValueScalars("spelling", ";"));
+  ++token_it;
+  EXPECT_THAT(&*token_it, IsKeyValueScalars("has_trailing_space", "true"));
   EXPECT_THAT(++token_it, Eq(token_value_node->end()));
 
   ++mapping_it;

+ 107 - 0
toolchain/parser/parse_tree_test.cpp

@@ -530,6 +530,113 @@ TEST_F(ParseTreeTest, Operators) {
            MatchFileEnd()}));
 }
 
+TEST_F(ParseTreeTest, OperatorFixity) {
+  TokenizedBuffer tokens = GetTokenizedBuffer(
+      "fn F(p: Int*, n: Int) {\n"
+      "  var q: Int* = p;\n"
+      "  var t: Type = Int*;\n"
+      "  t = t**;\n"
+      "  n = n * n;\n"
+      "  n = n*n;\n"
+      "  G(Int*, n * n);\n"
+      "}");
+  ParseTree tree = ParseTree::Parse(tokens, consumer);
+  EXPECT_FALSE(tree.HasErrors());
+
+  EXPECT_THAT(
+      tree,
+      MatchParseTreeNodes(
+          {MatchFunctionDeclaration(
+               MatchDeclaredName("F"),
+               MatchParameters(
+                   MatchPatternBinding(
+                       MatchDeclaredName("p"),
+                       MatchPostfixOperator(MatchNameReference("Int"), "*")),
+                   MatchParameterListComma(),
+                   MatchPatternBinding(MatchDeclaredName("n"),
+                                       MatchNameReference("Int"))),
+               MatchCodeBlock(
+                   MatchVariableDeclaration(
+                       MatchPatternBinding(MatchDeclaredName("q"),
+                                           MatchPostfixOperator(
+                                               MatchNameReference("Int"), "*")),
+                       MatchVariableInitializer(MatchNameReference("p")),
+                       MatchDeclarationEnd()),
+                   MatchVariableDeclaration(
+                       MatchPatternBinding(MatchDeclaredName("t"),
+                                           MatchNameReference("Type")),
+                       MatchVariableInitializer(MatchPostfixOperator(
+                           MatchNameReference("Int"), "*")),
+                       MatchDeclarationEnd()),
+                   MatchExpressionStatement(MatchInfixOperator(
+                       MatchNameReference("t"), "=",
+                       MatchPostfixOperator(
+                           MatchPostfixOperator(MatchNameReference("t"), "*"),
+                           "*"))),
+                   MatchExpressionStatement(MatchInfixOperator(
+                       MatchNameReference("n"), "=",
+                       MatchInfixOperator(MatchNameReference("n"), "*",
+                                          MatchNameReference("n")))),
+                   MatchExpressionStatement(MatchInfixOperator(
+                       MatchNameReference("n"), "=",
+                       MatchInfixOperator(MatchNameReference("n"), "*",
+                                          MatchNameReference("n")))),
+                   MatchExpressionStatement(MatchCallExpression(
+                       MatchNameReference("G"),
+                       MatchPostfixOperator(MatchNameReference("Int"), "*"),
+                       MatchCallExpressionComma(),
+                       MatchInfixOperator(MatchNameReference("n"), "*",
+                                          MatchNameReference("n")),
+                       MatchCallExpressionEnd())),
+                   MatchCodeBlockEnd())),
+           MatchFileEnd()}));
+}
+
+TEST_F(ParseTreeTest, OperatorWhitespaceErrors) {
+  // Test dispositions: Recovered means we issued an error but recovered a
+  // proper parse tree; Failed means we didn't fully recover from the error.
+  enum Kind { Valid, Recovered, Failed };
+
+  struct Testcase {
+    const char* input;
+    Kind kind;
+  } testcases[] = {
+      {"var v: Type = Int*;", Valid},
+      {"var v: Type = Int *;", Recovered},
+      {"var v: Type = Int* ;", Valid},
+      {"var v: Type = Int * ;", Recovered},
+      {"var n: Int = n * n;", Valid},
+      {"var n: Int = n*n;", Valid},
+      {"var n: Int = (n)*3;", Valid},
+      {"var n: Int = 3*(n);", Valid},
+      {"var n: Int = n *n;", Recovered},
+      // FIXME: We could figure out that this first Failed example is infix
+      // with one-token lookahead.
+      {"var n: Int = n* n;", Failed},
+      {"var n: Int = n* -n;", Failed},
+      {"var n: Int = n* *p;", Failed},
+      // FIXME: We try to form (n*)*p and reject due to missing parentheses
+      // before we notice the missing whitespace around the second `*`.
+      // It'd be better to (somehow) form n*(*p) and reject due to the missing
+      // whitespace around the first `*`.
+      {"var n: Int = n**p;", Failed},
+      {"var n: Int = -n;", Valid},
+      {"var n: Int = - n;", Recovered},
+      {"var n: Int =-n;", Valid},
+      {"var n: Int =- n;", Recovered},
+      {"var n: Int = F(Int *);", Recovered},
+      {"var n: Int = F(Int *, 0);", Recovered},
+  };
+
+  for (auto [input, kind] : testcases) {
+    TokenizedBuffer tokens = GetTokenizedBuffer(input);
+    ErrorTrackingDiagnosticConsumer error_tracker(consumer);
+    ParseTree tree = ParseTree::Parse(tokens, error_tracker);
+    EXPECT_THAT(tree.HasErrors(), Eq(kind == Failed)) << input;
+    EXPECT_THAT(error_tracker.SeenError(), Eq(kind != Valid)) << input;
+  }
+}
+
 TEST_F(ParseTreeTest, VariableDeclarations) {
   TokenizedBuffer tokens = GetTokenizedBuffer(
       "var v: Int = 0;\n"

+ 169 - 2
toolchain/parser/parser_impl.cpp

@@ -116,6 +116,54 @@ struct UnexpectedTokenAfterListElement
   static constexpr llvm::StringLiteral Message = "Expected `,` or `)`.";
 };
 
+struct BinaryOperatorRequiresWhitespace
+    : SimpleDiagnostic<BinaryOperatorRequiresWhitespace> {
+  static constexpr llvm::StringLiteral ShortName = "syntax-error";
+  static constexpr const char* Message =
+      "Whitespace missing {0} binary operator.";
+
+  bool has_leading_space;
+  bool has_trailing_space;
+
+  auto Format() -> std::string {
+    const char* where = "around";
+    // clang-format off
+    if (has_leading_space) {
+      where = "after";
+    } else if (has_trailing_space) {
+      where = "before";
+    }
+    // clang-format on
+    return llvm::formatv(Message, where);
+  }
+};
+
+struct UnaryOperatorHasWhitespace
+    : SimpleDiagnostic<UnaryOperatorHasWhitespace> {
+  static constexpr llvm::StringLiteral ShortName = "syntax-error";
+  static constexpr const char* Message =
+      "Whitespace is not allowed {0} this unary operator.";
+
+  bool prefix;
+
+  auto Format() -> std::string {
+    return llvm::formatv(Message, prefix ? "after" : "before");
+  }
+};
+
+struct UnaryOperatorRequiresWhitespace
+    : SimpleDiagnostic<UnaryOperatorRequiresWhitespace> {
+  static constexpr llvm::StringLiteral ShortName = "syntax-error";
+  static constexpr const char* Message =
+      "Whitespace is required {0} this unary operator.";
+
+  bool prefix;
+
+  auto Format() -> std::string {
+    return llvm::formatv(Message, prefix ? "before" : "after");
+  }
+};
+
 struct OperatorRequiresParentheses
     : SimpleDiagnostic<OperatorRequiresParentheses> {
   static constexpr llvm::StringLiteral ShortName = "syntax-error";
@@ -540,6 +588,7 @@ auto ParseTree::Parser::ParseVariableDeclaration() -> Node {
   auto semi = ConsumeAndAddLeafNodeIf(TokenKind::Semi(),
                                       ParseNodeKind::DeclarationEnd());
   if (!semi) {
+    emitter.EmitError<ExpectedSemiAfterExpression>(*position);
     SkipPastLikelyEnd(var_token, [&](TokenizedBuffer::Token semi) {
       return AddLeafNode(ParseNodeKind::DeclarationEnd(), semi);
     });
@@ -685,6 +734,114 @@ auto ParseTree::Parser::ParsePostfixExpression() -> llvm::Optional<Node> {
   }
 }
 
+// Determines whether the given token is considered to be the start of an
+// operand according to the rules for infix operator parsing.
+static auto IsAssumedStartOfOperand(TokenKind kind) -> bool {
+  return kind.IsOneOf({TokenKind::OpenParen(), TokenKind::Identifier(),
+                       TokenKind::IntegerLiteral(), TokenKind::RealLiteral(),
+                       TokenKind::StringLiteral()});
+}
+
+// Determines whether the given token is considered to be the end of an operand
+// according to the rules for infix operator parsing.
+static auto IsAssumedEndOfOperand(TokenKind kind) -> bool {
+  return kind.IsOneOf({TokenKind::CloseParen(), TokenKind::CloseCurlyBrace(),
+                       TokenKind::CloseSquareBracket(), TokenKind::Identifier(),
+                       TokenKind::IntegerLiteral(), TokenKind::RealLiteral(),
+                       TokenKind::StringLiteral()});
+}
+
+// Determines whether the given token could possibly be the start of an operand.
+// This is conservatively correct, and will never incorrectly return `false`,
+// but can incorrectly return `true`.
+static auto IsPossibleStartOfOperand(TokenKind kind) -> bool {
+  return !kind.IsOneOf({TokenKind::CloseParen(), TokenKind::CloseCurlyBrace(),
+                        TokenKind::CloseSquareBracket(), TokenKind::Comma(),
+                        TokenKind::Semi(), TokenKind::Colon()});
+}
+
+auto ParseTree::Parser::IsLexicallyValidInfixOperator() -> bool {
+  assert(!AtEndOfFile() && "Expected an operator token.");
+
+  bool leading_space = tokens.HasLeadingWhitespace(*position);
+  bool trailing_space = tokens.HasTrailingWhitespace(*position);
+
+  // If there's whitespace on both sides, it's an infix operator.
+  if (leading_space && trailing_space) {
+    return true;
+  }
+
+  // If there's whitespace on exactly one side, it's not an infix operator.
+  if (leading_space || trailing_space) {
+    return false;
+  }
+
+  // Otherwise, for an infix operator, the preceding token must be any close
+  // bracket, identifier, or literal and the next token must be an open paren,
+  // identifier, or literal.
+  if (position == tokens.Tokens().begin() ||
+      !IsAssumedEndOfOperand(tokens.GetKind(*(position - 1))) ||
+      !IsAssumedStartOfOperand(tokens.GetKind(*(position + 1)))) {
+    return false;
+  }
+
+  return true;
+}
+
+auto ParseTree::Parser::DiagnoseOperatorFixity(OperatorFixity fixity) -> void {
+  bool is_valid_as_infix = IsLexicallyValidInfixOperator();
+
+  if (fixity == OperatorFixity::Infix) {
+    // Infix operators must satisfy the infix operator rules.
+    if (!is_valid_as_infix) {
+      emitter.EmitError<BinaryOperatorRequiresWhitespace>(
+          *position,
+          {.has_leading_space = tokens.HasLeadingWhitespace(*position),
+           .has_trailing_space = tokens.HasTrailingWhitespace(*position)});
+    }
+  } else {
+    bool prefix = fixity == OperatorFixity::Prefix;
+
+    // Whitespace is not permitted between a symbolic pre/postfix operator and
+    // its operand.
+    if (NextTokenKind().IsSymbol() &&
+        (prefix ? tokens.HasTrailingWhitespace(*position)
+                : tokens.HasLeadingWhitespace(*position))) {
+      emitter.EmitError<UnaryOperatorHasWhitespace>(*position,
+                                                    {.prefix = prefix});
+    }
+    // Pre/postfix operators must not satisfy the infix operator rules.
+    if (is_valid_as_infix) {
+      emitter.EmitError<UnaryOperatorRequiresWhitespace>(*position,
+                                                         {.prefix = prefix});
+    }
+  }
+}
+
+auto ParseTree::Parser::IsTrailingOperatorInfix() -> bool {
+  if (AtEndOfFile()) {
+    return false;
+  }
+
+  // An operator that follows the infix operator rules is parsed as
+  // infix, unless the next token means that it can't possibly be.
+  if (IsLexicallyValidInfixOperator() &&
+      IsPossibleStartOfOperand(tokens.GetKind(*(position + 1)))) {
+    return true;
+  }
+
+  // A trailing operator with leading whitespace that's not valid as infix is
+  // not valid at all. If the next token looks like the start of an operand,
+  // then parse as infix, otherwise as postfix. Either way we'll produce a
+  // diagnostic later on.
+  if (tokens.HasLeadingWhitespace(*position) &&
+      IsAssumedStartOfOperand(tokens.GetKind(*(position + 1)))) {
+    return true;
+  }
+
+  return false;
+}
+
 auto ParseTree::Parser::ParseOperatorExpression(
     PrecedenceGroup ambient_precedence) -> llvm::Optional<Node> {
   auto start = GetSubtreeStartPosition();
@@ -703,6 +860,9 @@ auto ParseTree::Parser::ParseOperatorExpression(
       // The precedence rules don't permit this prefix operator in this
       // context. Diagnose this, but carry on and parse it anyway.
       emitter.EmitError<OperatorRequiresParentheses>(*position);
+    } else {
+      // Check that this operator follows the proper whitespace rules.
+      DiagnoseOperatorFixity(OperatorFixity::Prefix);
     }
 
     auto operator_token = Consume(NextTokenKind());
@@ -713,9 +873,13 @@ auto ParseTree::Parser::ParseOperatorExpression(
   }
 
   // Consume a sequence of infix and postfix operators.
-  while (auto trailing_operator =
-             PrecedenceGroup::ForTrailing(NextTokenKind())) {
+  while (auto trailing_operator = PrecedenceGroup::ForTrailing(
+             NextTokenKind(), IsTrailingOperatorInfix())) {
     auto [operator_precedence, is_binary] = *trailing_operator;
+
+    // FIXME: If this operator is ambiguous with either the ambient precedence
+    // or the LHS precedence, and there's a variant with a different fixity
+    // that would work, use that one instead for error recovery.
     if (PrecedenceGroup::GetPriority(ambient_precedence, operator_precedence) !=
         OperatorPriority::RightFirst) {
       // The precedence rules don't permit this operator in this context. Try
@@ -730,6 +894,9 @@ auto ParseTree::Parser::ParseOperatorExpression(
       // this operator. Either way, parentheses are required.
       emitter.EmitError<OperatorRequiresParentheses>(*position);
       lhs = llvm::None;
+    } else {
+      DiagnoseOperatorFixity(is_binary ? OperatorFixity::Infix
+                                       : OperatorFixity::Postfix);
     }
 
     auto operator_token = Consume(NextTokenKind());

+ 14 - 0
toolchain/parser/parser_impl.h

@@ -191,6 +191,20 @@ class ParseTree::Parser {
   // -   designators
   auto ParsePostfixExpression() -> llvm::Optional<Node>;
 
+  enum class OperatorFixity { Prefix, Infix, Postfix };
+
+  // Determines whether the current token satisfies the lexical validity rules
+  // for an infix operator.
+  auto IsLexicallyValidInfixOperator() -> bool;
+
+  // Diagnoses if the current token is not written properly for the given
+  // fixity, for example because mandatory whitespace is missing.
+  auto DiagnoseOperatorFixity(OperatorFixity fixity) -> void;
+
+  // Determines whether the current trailing operator should be treated as
+  // infix.
+  auto IsTrailingOperatorInfix() -> bool;
+
   // Parses an expression involving operators, in a context with the given
   // precedence.
   auto ParseOperatorExpression(PrecedenceGroup precedence)

+ 15 - 17
toolchain/parser/precedence.cpp

@@ -24,6 +24,10 @@ enum PrecedenceLevel : int8_t {
   BitwiseOr,
   BitwiseXor,
   BitShift,
+  // Type formation.
+  TypePostfix,
+  // Sentinel representing a type context.
+  Type,
   // Logical.
   LogicalPrefix,
   Relational,
@@ -32,8 +36,6 @@ enum PrecedenceLevel : int8_t {
   // Assignment.
   SimpleAssignment,
   CompoundAssignment,
-  // Sentinel representing a type context.
-  Type,
   // Sentinel representing a context in which any operator can appear.
   Lowest,
 };
@@ -46,30 +48,21 @@ struct OperatorPriorityTable {
     // Start with a list of <higher precedence>, <lower precedence>
     // relationships.
     MarkHigherThan({Highest}, {NumericPrefix, BitwisePrefix, LogicalPrefix,
-                               NumericPostfix});
+                               NumericPostfix, TypePostfix});
     MarkHigherThan({NumericPrefix, NumericPostfix},
                    {Modulo, Multiplicative, BitShift});
     MarkHigherThan({Multiplicative}, {Additive});
     MarkHigherThan({BitwisePrefix},
                    {BitwiseAnd, BitwiseOr, BitwiseXor, BitShift});
+    MarkHigherThan({TypePostfix}, {Type});
     MarkHigherThan(
-        {Modulo, Additive, BitwiseAnd, BitwiseOr, BitwiseXor, BitShift},
+        {Modulo, Additive, BitwiseAnd, BitwiseOr, BitwiseXor, BitShift, Type},
         {SimpleAssignment, CompoundAssignment, Relational});
     MarkHigherThan({Relational, LogicalPrefix}, {LogicalAnd, LogicalOr});
     MarkHigherThan(
         {SimpleAssignment, CompoundAssignment, LogicalAnd, LogicalOr},
         {Lowest});
 
-    // FIXME: Decide upon a precedence level to use for types. It's important
-    // that this is no higher than simple assignment, otherwise
-    //   var x: T = y;
-    // would be parsed as
-    //   var x: (T = y);
-    // For now, we have no type operators and no operator overloading, so we
-    // only parse primary expressions in types.
-    MarkHigherThan({Highest}, {Type});
-    MarkHigherThan({Type}, {Lowest});
-
     // Compute the transitive closure of the above relationships: if we parse
     // `a $ b @ c` as `(a $ b) @ c` and parse `b @ c % d` as `(b @ c) % d`,
     // then we will parse `a $ b @ c % d` as `((a $ b) @ c) % d` and should
@@ -144,7 +137,7 @@ struct OperatorPriorityTable {
     }
 
     // Postfix operators are symmetric with prefix operators.
-    for (PrecedenceLevel postfix : {NumericPostfix}) {
+    for (PrecedenceLevel postfix : {NumericPostfix, TypePostfix}) {
       table[postfix][postfix] = OperatorPriority::LeftFirst;
     }
 
@@ -216,7 +209,8 @@ auto PrecedenceGroup::ForLeading(TokenKind kind)
   }
 }
 
-auto PrecedenceGroup::ForTrailing(TokenKind kind) -> llvm::Optional<Trailing> {
+auto PrecedenceGroup::ForTrailing(TokenKind kind, bool infix)
+    -> llvm::Optional<Trailing> {
   switch (kind) {
     // Assignment operators.
     case TokenKind::Equal():
@@ -265,12 +259,16 @@ auto PrecedenceGroup::ForTrailing(TokenKind kind) -> llvm::Optional<Trailing> {
       return Trailing{.level = Additive, .is_binary = true};
 
     // Multiplicative operators.
-    case TokenKind::Star():
     case TokenKind::Slash():
       return Trailing{.level = Multiplicative, .is_binary = true};
     case TokenKind::Percent():
       return Trailing{.level = Modulo, .is_binary = true};
 
+    // `*` could be multiplication or pointer type formation.
+    case TokenKind::Star():
+      return infix ? Trailing{.level = Multiplicative, .is_binary = true}
+                   : Trailing{.level = TypePostfix, .is_binary = false};
+
     // Postfix operators.
     case TokenKind::MinusMinus():
     case TokenKind::PlusPlus():

+ 5 - 2
toolchain/parser/precedence.h

@@ -57,8 +57,11 @@ class PrecedenceGroup {
 
   // Look up the operator information of the given infix or postfix operator
   // token, or return llvm::None if the given token is not an infix or postfix
-  // operator.
-  static auto ForTrailing(TokenKind kind) -> llvm::Optional<Trailing>;
+  // operator. `infix` indicates whether this is a valid infix operator, but is
+  // only considered if the same operator symbol is available as both infix and
+  // postfix.
+  static auto ForTrailing(TokenKind kind, bool infix)
+      -> llvm::Optional<Trailing>;
 
   friend auto operator==(PrecedenceGroup lhs, PrecedenceGroup rhs) -> bool {
     return lhs.level == rhs.level;

+ 81 - 40
toolchain/parser/precedence_test.cpp

@@ -20,71 +20,111 @@ TEST(PrecedenceTest, OperatorsAreRecognized) {
   EXPECT_FALSE(PrecedenceGroup::ForLeading(TokenKind::Slash()).hasValue());
   EXPECT_FALSE(PrecedenceGroup::ForLeading(TokenKind::Identifier()).hasValue());
 
-  EXPECT_TRUE(PrecedenceGroup::ForTrailing(TokenKind::Minus()).hasValue());
-  EXPECT_FALSE(PrecedenceGroup::ForTrailing(TokenKind::Tilde()).hasValue());
-  EXPECT_TRUE(PrecedenceGroup::ForTrailing(TokenKind::Slash()).hasValue());
+  EXPECT_TRUE(
+      PrecedenceGroup::ForTrailing(TokenKind::Minus(), false).hasValue());
   EXPECT_FALSE(
-      PrecedenceGroup::ForTrailing(TokenKind::Identifier()).hasValue());
+      PrecedenceGroup::ForTrailing(TokenKind::Tilde(), false).hasValue());
+  EXPECT_TRUE(
+      PrecedenceGroup::ForTrailing(TokenKind::Slash(), true).hasValue());
+  EXPECT_FALSE(
+      PrecedenceGroup::ForTrailing(TokenKind::Identifier(), false).hasValue());
+
+  EXPECT_TRUE(
+      PrecedenceGroup::ForTrailing(TokenKind::Minus(), true)->is_binary);
+  EXPECT_FALSE(
+      PrecedenceGroup::ForTrailing(TokenKind::MinusMinus(), false)->is_binary);
+}
 
-  EXPECT_TRUE(PrecedenceGroup::ForTrailing(TokenKind::Minus())->is_binary);
+TEST(PrecedenceTest, InfixVsPostfix) {
+  // A trailing `-` is always infix; a trailing `--` is always postfix.
+  EXPECT_TRUE(
+      PrecedenceGroup::ForTrailing(TokenKind::Minus(), false)->is_binary);
   EXPECT_FALSE(
-      PrecedenceGroup::ForTrailing(TokenKind::MinusMinus())->is_binary);
+      PrecedenceGroup::ForTrailing(TokenKind::MinusMinus(), true)->is_binary);
+
+  // A trailing `*` is interpreted based on context.
+  EXPECT_TRUE(PrecedenceGroup::ForTrailing(TokenKind::Star(), true)->is_binary);
+  EXPECT_FALSE(
+      PrecedenceGroup::ForTrailing(TokenKind::Star(), false)->is_binary);
+
+  // Infix `*` can appear in type contexts; binary `*` cannot.
+  EXPECT_THAT(PrecedenceGroup::GetPriority(
+                  PrecedenceGroup::ForTrailing(TokenKind::Star(), true)->level,
+                  PrecedenceGroup::ForType()),
+              Eq(OperatorPriority::Ambiguous));
+  EXPECT_THAT(PrecedenceGroup::GetPriority(
+                  PrecedenceGroup::ForTrailing(TokenKind::Star(), false)->level,
+                  PrecedenceGroup::ForType()),
+              Eq(OperatorPriority::LeftFirst));
+
+  // Binary `*` can appear in `+` contexts; binary `*` cannot.
+  EXPECT_THAT(PrecedenceGroup::GetPriority(
+                  PrecedenceGroup::ForTrailing(TokenKind::Star(), true)->level,
+                  PrecedenceGroup::ForTrailing(TokenKind::Plus(), true)->level),
+              Eq(OperatorPriority::LeftFirst));
+  EXPECT_THAT(PrecedenceGroup::GetPriority(
+                  PrecedenceGroup::ForTrailing(TokenKind::Star(), false)->level,
+                  PrecedenceGroup::ForTrailing(TokenKind::Plus(), true)->level),
+              Eq(OperatorPriority::Ambiguous));
 }
 
 TEST(PrecedenceTest, Associativity) {
   EXPECT_THAT(
       PrecedenceGroup::ForLeading(TokenKind::Minus())->GetAssociativity(),
       Eq(Associativity::RightToLeft));
-  EXPECT_THAT(PrecedenceGroup::ForTrailing(TokenKind::PlusPlus())
+  EXPECT_THAT(PrecedenceGroup::ForTrailing(TokenKind::PlusPlus(), false)
                   ->level.GetAssociativity(),
               Eq(Associativity::LeftToRight));
-  EXPECT_THAT(
-      PrecedenceGroup::ForTrailing(TokenKind::Plus())->level.GetAssociativity(),
-      Eq(Associativity::LeftToRight));
-  EXPECT_THAT(PrecedenceGroup::ForTrailing(TokenKind::Equal())
+  EXPECT_THAT(PrecedenceGroup::ForTrailing(TokenKind::Plus(), true)
+                  ->level.GetAssociativity(),
+              Eq(Associativity::LeftToRight));
+  EXPECT_THAT(PrecedenceGroup::ForTrailing(TokenKind::Equal(), true)
                   ->level.GetAssociativity(),
               Eq(Associativity::RightToLeft));
-  EXPECT_THAT(PrecedenceGroup::ForTrailing(TokenKind::PlusEqual())
+  EXPECT_THAT(PrecedenceGroup::ForTrailing(TokenKind::PlusEqual(), true)
                   ->level.GetAssociativity(),
               Eq(Associativity::None));
 }
 
 TEST(PrecedenceTest, DirectRelations) {
   EXPECT_THAT(PrecedenceGroup::GetPriority(
-                  PrecedenceGroup::ForTrailing(TokenKind::Star())->level,
-                  PrecedenceGroup::ForTrailing(TokenKind::Plus())->level),
+                  PrecedenceGroup::ForTrailing(TokenKind::Star(), true)->level,
+                  PrecedenceGroup::ForTrailing(TokenKind::Plus(), true)->level),
               Eq(OperatorPriority::LeftFirst));
   EXPECT_THAT(PrecedenceGroup::GetPriority(
-                  PrecedenceGroup::ForTrailing(TokenKind::Plus())->level,
-                  PrecedenceGroup::ForTrailing(TokenKind::Star())->level),
+                  PrecedenceGroup::ForTrailing(TokenKind::Plus(), true)->level,
+                  PrecedenceGroup::ForTrailing(TokenKind::Star(), true)->level),
               Eq(OperatorPriority::RightFirst));
 
   EXPECT_THAT(PrecedenceGroup::GetPriority(
-                  PrecedenceGroup::ForTrailing(TokenKind::Amp())->level,
-                  PrecedenceGroup::ForTrailing(TokenKind::Less())->level),
+                  PrecedenceGroup::ForTrailing(TokenKind::Amp(), true)->level,
+                  PrecedenceGroup::ForTrailing(TokenKind::Less(), true)->level),
               Eq(OperatorPriority::LeftFirst));
   EXPECT_THAT(PrecedenceGroup::GetPriority(
-                  PrecedenceGroup::ForTrailing(TokenKind::Less())->level,
-                  PrecedenceGroup::ForTrailing(TokenKind::Amp())->level),
+                  PrecedenceGroup::ForTrailing(TokenKind::Less(), true)->level,
+                  PrecedenceGroup::ForTrailing(TokenKind::Amp(), true)->level),
               Eq(OperatorPriority::RightFirst));
 }
 
 TEST(PrecedenceTest, IndirectRelations) {
-  EXPECT_THAT(PrecedenceGroup::GetPriority(
-                  PrecedenceGroup::ForTrailing(TokenKind::Star())->level,
-                  PrecedenceGroup::ForTrailing(TokenKind::OrKeyword())->level),
-              Eq(OperatorPriority::LeftFirst));
-  EXPECT_THAT(PrecedenceGroup::GetPriority(
-                  PrecedenceGroup::ForTrailing(TokenKind::OrKeyword())->level,
-                  PrecedenceGroup::ForTrailing(TokenKind::Star())->level),
-              Eq(OperatorPriority::RightFirst));
+  EXPECT_THAT(
+      PrecedenceGroup::GetPriority(
+          PrecedenceGroup::ForTrailing(TokenKind::Star(), true)->level,
+          PrecedenceGroup::ForTrailing(TokenKind::OrKeyword(), true)->level),
+      Eq(OperatorPriority::LeftFirst));
+  EXPECT_THAT(
+      PrecedenceGroup::GetPriority(
+          PrecedenceGroup::ForTrailing(TokenKind::OrKeyword(), true)->level,
+          PrecedenceGroup::ForTrailing(TokenKind::Star(), true)->level),
+      Eq(OperatorPriority::RightFirst));
 
+  EXPECT_THAT(
+      PrecedenceGroup::GetPriority(
+          *PrecedenceGroup::ForLeading(TokenKind::Tilde()),
+          PrecedenceGroup::ForTrailing(TokenKind::Equal(), true)->level),
+      Eq(OperatorPriority::LeftFirst));
   EXPECT_THAT(PrecedenceGroup::GetPriority(
-                  *PrecedenceGroup::ForLeading(TokenKind::Tilde()),
-                  PrecedenceGroup::ForTrailing(TokenKind::Equal())->level),
-              Eq(OperatorPriority::LeftFirst));
-  EXPECT_THAT(PrecedenceGroup::GetPriority(
-                  PrecedenceGroup::ForTrailing(TokenKind::Equal())->level,
+                  PrecedenceGroup::ForTrailing(TokenKind::Equal(), true)->level,
                   *PrecedenceGroup::ForLeading(TokenKind::Tilde())),
               Eq(OperatorPriority::RightFirst));
 }
@@ -100,15 +140,16 @@ TEST(PrecedenceTest, IncomparableOperators) {
               Eq(OperatorPriority::Ambiguous));
   EXPECT_THAT(PrecedenceGroup::GetPriority(
                   *PrecedenceGroup::ForLeading(TokenKind::Minus()),
-                  PrecedenceGroup::ForTrailing(TokenKind::Amp())->level),
-              Eq(OperatorPriority::Ambiguous));
-  EXPECT_THAT(PrecedenceGroup::GetPriority(
-                  PrecedenceGroup::ForTrailing(TokenKind::Equal())->level,
-                  PrecedenceGroup::ForTrailing(TokenKind::PipeEqual())->level),
+                  PrecedenceGroup::ForTrailing(TokenKind::Amp(), true)->level),
               Eq(OperatorPriority::Ambiguous));
+  EXPECT_THAT(
+      PrecedenceGroup::GetPriority(
+          PrecedenceGroup::ForTrailing(TokenKind::Equal(), true)->level,
+          PrecedenceGroup::ForTrailing(TokenKind::PipeEqual(), true)->level),
+      Eq(OperatorPriority::Ambiguous));
   EXPECT_THAT(PrecedenceGroup::GetPriority(
-                  PrecedenceGroup::ForTrailing(TokenKind::Plus())->level,
-                  PrecedenceGroup::ForTrailing(TokenKind::Amp())->level),
+                  PrecedenceGroup::ForTrailing(TokenKind::Plus(), true)->level,
+                  PrecedenceGroup::ForTrailing(TokenKind::Amp(), true)->level),
               Eq(OperatorPriority::Ambiguous));
 }