Просмотр исходного кода

Improve diagnosis of mismatched brackets. (#3282)

Move handling of mismatched brackets out of the main lexing loop into a
separate pass that is only run if there are mismatched brackets This is
done in preparation for using both lookahead and lookbehind to work out
how to match brackets, and to get this code far away from the hot lexing
loop.

Fix bracket insertion location to be immediately after the token that
we're inserting the bracket after, rather than potentially at the end of
a comment. When there are open brackets at the end of the file, say that
there are open brackets, not that there's a closing bracket without a
matching opening bracket.
Richard Smith 2 лет назад
Родитель
Сommit
0a06fceb5f

+ 1 - 1
toolchain/diagnostics/diagnostic_kind.def

@@ -38,7 +38,6 @@ CARBON_DIAGNOSTIC_KIND(InvalidDigit)
 CARBON_DIAGNOSTIC_KIND(InvalidDigitSeparator)
 CARBON_DIAGNOSTIC_KIND(InvalidHorizontalWhitespaceInString)
 CARBON_DIAGNOSTIC_KIND(IrregularDigitSeparators)
-CARBON_DIAGNOSTIC_KIND(MismatchedClosing)
 CARBON_DIAGNOSTIC_KIND(MismatchedIndentInString)
 CARBON_DIAGNOSTIC_KIND(MultiLineStringWithDoubleQuotes)
 CARBON_DIAGNOSTIC_KIND(NoWhitespaceAfterCommentIntroducer)
@@ -49,6 +48,7 @@ CARBON_DIAGNOSTIC_KIND(UnicodeEscapeSurrogate)
 CARBON_DIAGNOSTIC_KIND(UnicodeEscapeTooLarge)
 CARBON_DIAGNOSTIC_KIND(UnknownBaseSpecifier)
 CARBON_DIAGNOSTIC_KIND(UnknownEscapeSequence)
+CARBON_DIAGNOSTIC_KIND(UnmatchedOpening)
 CARBON_DIAGNOSTIC_KIND(UnmatchedClosing)
 CARBON_DIAGNOSTIC_KIND(UnrecognizedCharacters)
 CARBON_DIAGNOSTIC_KIND(UnterminatedString)

+ 1 - 1
toolchain/driver/testdata/fail_errors_in_two_files.carbon

@@ -8,7 +8,7 @@
 
 // --- file1.carbon
 
-// CHECK:STDERR: file1.carbon:[[@LINE+3]]:24: ERROR: Closing symbol does not match most recent opening symbol.
+// CHECK:STDERR: file1.carbon:[[@LINE+3]]:24: ERROR: Opening symbol without a corresponding closing symbol.
 // CHECK:STDERR: fn run(String program) {
 // CHECK:STDERR:                        ^
 fn run(String program) {

+ 1 - 1
toolchain/driver/testdata/fail_errors_sorted.carbon

@@ -6,7 +6,7 @@
 //
 // AUTOUPDATE
 
-// CHECK:STDERR: fail_errors_sorted.carbon:[[@LINE+3]]:24: ERROR: Closing symbol does not match most recent opening symbol.
+// CHECK:STDERR: fail_errors_sorted.carbon:[[@LINE+3]]:24: ERROR: Opening symbol without a corresponding closing symbol.
 // CHECK:STDERR: fn run(String program) {
 // CHECK:STDERR:                        ^
 fn run(String program) {

+ 1 - 1
toolchain/driver/testdata/fail_errors_streamed.carbon

@@ -12,7 +12,7 @@ fn run(String program) {
 // CHECK:STDERR: fail_errors_streamed.carbon:[[@LINE+6]]:10: ERROR: Invalid digit 'a' in decimal numeric literal.
 // CHECK:STDERR: var x = 3a;
 // CHECK:STDERR:          ^
-// CHECK:STDERR: fail_errors_streamed.carbon:[[@LINE-6]]:24: ERROR: Closing symbol does not match most recent opening symbol.
+// CHECK:STDERR: fail_errors_streamed.carbon:[[@LINE-6]]:24: ERROR: Opening symbol without a corresponding closing symbol.
 // CHECK:STDERR: fn run(String program) {
 // CHECK:STDERR:                        ^
 var x = 3a;

+ 206 - 85
toolchain/lex/lex.cpp

@@ -140,15 +140,6 @@ class [[clang::internal_linkage]] Lexer {
   // literal and if so form the corresponding token.
   auto LexWordAsTypeLiteralToken(llvm::StringRef word, int column) -> LexResult;
 
-  // Closes all open groups that cannot remain open across a closing symbol.
-  // Users may pass `Error` to close all open groups.
-  //
-  // Explicitly kept out-of-line because it's on an error path, and so inlining
-  // would be performance neutral. Keeping it out-of-line makes the generated
-  // code easier to understand when profiling.
-  [[gnu::noinline]] auto CloseInvalidOpenGroups(TokenKind kind,
-                                                ssize_t position) -> void;
-
   auto LexKeywordOrIdentifier(llvm::StringRef source_text, ssize_t& position)
       -> LexResult;
 
@@ -161,16 +152,21 @@ class [[clang::internal_linkage]] Lexer {
 
   auto LexFileEnd(llvm::StringRef source_text, ssize_t position) -> void;
 
+  auto DiagnoseAndFixMismatchedBrackets() -> void;
+
   // The main entry point for dispatching through the lexer's table. This method
   // should always fully consume the source text.
   auto Lex() && -> TokenizedBuffer;
 
  private:
+  class ErrorRecoveryBuffer;
+
   TokenizedBuffer buffer_;
 
   ssize_t line_index_;
 
   llvm::SmallVector<TokenIndex> open_groups_;
+  bool has_mismatched_brackets_ = false;
 
   ErrorTrackingDiagnosticConsumer consumer_;
 
@@ -997,46 +993,26 @@ auto Lexer::LexOpeningSymbolToken(llvm::StringRef source_text, TokenKind kind,
 
 auto Lexer::LexClosingSymbolToken(llvm::StringRef source_text, TokenKind kind,
                                   ssize_t& position) -> LexResult {
-  auto unmatched_error = [&] {
-    CARBON_DIAGNOSTIC(UnmatchedClosing, Error,
-                      "Closing symbol without a corresponding opening symbol.");
-    emitter_.Emit(source_text.begin() + position, UnmatchedClosing);
-    TokenIndex token = buffer_.AddToken({.kind = TokenKind::Error,
-                                         .token_line = current_line(),
-                                         .column = ComputeColumn(position),
-                                         .error_length = 1});
-    ++position;
-    return token;
-  };
+  TokenIndex token = LexOneCharSymbolToken(source_text, kind, position);
+  auto& token_info = buffer_.GetTokenInfo(token);
 
-  // If we have no open groups, this is an error.
+  // If there's not a matching opening symbol, just track that we had an error.
+  // We will diagnose and recover when we reach the end of the file. See
+  // `DiagnoseAndFixMismatchedBrackets` for details.
   if (LLVM_UNLIKELY(open_groups_.empty())) {
-    return unmatched_error();
+    has_mismatched_brackets_ = true;
+    return token;
   }
 
-  TokenIndex opening_token = open_groups_.back();
-  // Close any invalid open groups first.
-  if (LLVM_UNLIKELY(buffer_.GetTokenInfo(opening_token).kind !=
-                    kind.opening_symbol())) {
-    CloseInvalidOpenGroups(kind, position);
-    // This may exhaust the open groups so re-check and re-error if needed.
-    if (open_groups_.empty()) {
-      return unmatched_error();
-    }
-    opening_token = open_groups_.back();
-    CARBON_DCHECK(buffer_.GetTokenInfo(opening_token).kind ==
-                  kind.opening_symbol());
+  TokenIndex opening_token = open_groups_.pop_back_val();
+  auto& opening_token_info = buffer_.GetTokenInfo(opening_token);
+  if (LLVM_UNLIKELY(opening_token_info.kind != kind.opening_symbol())) {
+    has_mismatched_brackets_ = true;
+    return token;
   }
-  open_groups_.pop_back();
-
-  // Now that the groups are all matched up, lex the actual token.
-  TokenIndex token = LexOneCharSymbolToken(source_text, kind, position);
-
-  // Note that it is important to get fresh token infos here as lexing the
-  // open token would invalidate any pointers.
-  buffer_.GetTokenInfo(opening_token).closing_token = token;
-  buffer_.GetTokenInfo(token).opening_token = opening_token;
 
+  opening_token_info.closing_token = token;
+  token_info.opening_token = opening_token;
   return token;
 }
 
@@ -1109,42 +1085,6 @@ auto Lexer::LexWordAsTypeLiteralToken(llvm::StringRef word, int column)
   return token;
 }
 
-auto Lexer::CloseInvalidOpenGroups(TokenKind kind, ssize_t position) -> void {
-  CARBON_CHECK(kind.is_closing_symbol() || kind == TokenKind::Error);
-  CARBON_CHECK(!open_groups_.empty());
-
-  int column = ComputeColumn(position);
-
-  do {
-    TokenIndex opening_token = open_groups_.back();
-    TokenKind opening_kind = buffer_.GetTokenInfo(opening_token).kind;
-    if (kind == opening_kind.closing_symbol()) {
-      return;
-    }
-
-    open_groups_.pop_back();
-    CARBON_DIAGNOSTIC(
-        MismatchedClosing, Error,
-        "Closing symbol does not match most recent opening symbol.");
-    token_emitter_.Emit(opening_token, MismatchedClosing);
-
-    CARBON_CHECK(!buffer_.tokens().empty())
-        << "Must have a prior opening token!";
-    TokenIndex prev_token = buffer_.tokens().end()[-1];
-
-    // TODO: do a smarter backwards scan for where to put the closing
-    // token.
-    TokenIndex closing_token = buffer_.AddToken(
-        {.kind = opening_kind.closing_symbol(),
-         .has_trailing_space = buffer_.HasTrailingWhitespace(prev_token),
-         .is_recovery = true,
-         .token_line = current_line(),
-         .column = column});
-    buffer_.GetTokenInfo(opening_token).closing_token = closing_token;
-    buffer_.GetTokenInfo(closing_token).opening_token = opening_token;
-  } while (!open_groups_.empty());
-}
-
 auto Lexer::LexKeywordOrIdentifier(llvm::StringRef source_text,
                                    ssize_t& position) -> LexResult {
   if (static_cast<unsigned char>(source_text[position]) > 0x7F) {
@@ -1297,15 +1237,196 @@ auto Lexer::LexFileEnd(llvm::StringRef source_text, ssize_t position) -> void {
   // The end-of-file token is always considered to be whitespace.
   NoteWhitespace();
 
-  // Close any open groups. We do this after marking whitespace, it will
-  // preserve that.
-  if (!open_groups_.empty()) {
-    CloseInvalidOpenGroups(TokenKind::Error, position);
-  }
-
   buffer_.AddToken({.kind = TokenKind::FileEnd,
                     .token_line = current_line(),
                     .column = ComputeColumn(position)});
+
+  // If we had any mismatched brackets, issue diagnostics and fix them.
+  if (has_mismatched_brackets_ || !open_groups_.empty()) {
+    DiagnoseAndFixMismatchedBrackets();
+  }
+}
+
+// A list of pending insertions to make into a tokenized buffer for error
+// recovery. These are buffered so that we can perform them in linear time.
+class Lexer::ErrorRecoveryBuffer {
+ public:
+  ErrorRecoveryBuffer(TokenizedBuffer& buffer) : buffer_(buffer) {}
+
+  auto empty() const -> bool {
+    return new_tokens_.empty() && !any_error_tokens_;
+  }
+
+  // Insert a recovery token of kind `kind` before `insert_before`. Note that we
+  // currently require insertions to be specified in source order, but this
+  // restriction would be easy to relax.
+  auto InsertBefore(TokenIndex insert_before, TokenKind kind) -> void {
+    CARBON_CHECK(insert_before.index > 0)
+        << "Cannot insert before the start of file token.";
+    CARBON_CHECK(new_tokens_.empty() ||
+                 new_tokens_.back().first <= insert_before)
+        << "Insertions performed out of order.";
+
+    // Find the end of the token before the target token, and add the new token
+    // there. Note that new_token_column is a 1-based column number.
+    auto insert_after = TokenIndex(insert_before.index - 1);
+    auto [new_token_line, new_token_column] =
+        buffer_.GetEndLocation(insert_after);
+    new_tokens_.push_back(
+        {insert_before,
+         {.kind = kind,
+          .has_trailing_space = buffer_.HasTrailingWhitespace(insert_after),
+          .is_recovery = true,
+          .token_line = new_token_line,
+          .column = new_token_column - 1}});
+  }
+
+  // Replace the given token with an error token. We do this immediately,
+  // because we don't benefit from buffering it.
+  auto ReplaceWithError(TokenIndex token) -> void {
+    auto& token_info = buffer_.GetTokenInfo(token);
+    token_info.error_length = buffer_.GetTokenText(token).size();
+    token_info.kind = TokenKind::Error;
+    any_error_tokens_ = true;
+  }
+
+  // Merge the recovery tokens into the token list of the tokenized buffer.
+  auto Apply() -> void {
+    auto old_tokens = std::move(buffer_.token_infos_);
+    buffer_.token_infos_.clear();
+    buffer_.token_infos_.reserve(old_tokens.size() + new_tokens_.size());
+
+    int old_tokens_offset = 0;
+    for (auto [next_offset, info] : new_tokens_) {
+      buffer_.token_infos_.append(old_tokens.begin() + old_tokens_offset,
+                                  old_tokens.begin() + next_offset.index);
+      buffer_.token_infos_.push_back(info);
+      old_tokens_offset = next_offset.index;
+    }
+    buffer_.token_infos_.append(old_tokens.begin() + old_tokens_offset,
+                                old_tokens.end());
+  }
+
+  // Perform bracket matching to fix cross-references between tokens. This must
+  // be done after all recovery is performed and all brackets match, because
+  // recovery will change token indexes.
+  auto FixTokenCrossReferences() -> void {
+    llvm::SmallVector<TokenIndex> open_groups;
+    for (auto token : buffer_.tokens()) {
+      auto kind = buffer_.GetKind(token);
+      if (kind.is_opening_symbol()) {
+        open_groups.push_back(token);
+      } else if (kind.is_closing_symbol()) {
+        CARBON_CHECK(!open_groups.empty()) << "Failed to balance brackets";
+        auto opening_token = open_groups.pop_back_val();
+
+        CARBON_CHECK(kind ==
+                     buffer_.GetTokenInfo(opening_token).kind.closing_symbol())
+            << "Failed to balance brackets";
+        auto& opening_token_info = buffer_.GetTokenInfo(opening_token);
+        auto& closing_token_info = buffer_.GetTokenInfo(token);
+        opening_token_info.closing_token = token;
+        closing_token_info.opening_token = opening_token;
+      }
+    }
+  }
+
+ private:
+  TokenizedBuffer& buffer_;
+
+  // A list of tokens to insert into the token stream to fix mismatched
+  // brackets. The first element in each pair is the original token index to
+  // insert the new token before.
+  llvm::SmallVector<std::pair<TokenIndex, TokenizedBuffer::TokenInfo>>
+      new_tokens_;
+
+  // Whether we have changed any tokens into error tokens.
+  bool any_error_tokens_ = false;
+};
+
+// Issue an UnmatchedOpening diagnostic.
+static auto DiagnoseUnmatchedOpening(TokenDiagnosticEmitter& emitter,
+                                     TokenIndex opening_token) -> void {
+  CARBON_DIAGNOSTIC(UnmatchedOpening, Error,
+                    "Opening symbol without a corresponding closing symbol.");
+  emitter.Emit(opening_token, UnmatchedOpening);
+}
+
+// If brackets didn't pair or nest properly, find a set of places to insert
+// brackets to fix the nesting, issue suitable diagnostics, and update the
+// token list to describe the fixes.
+auto Lexer::DiagnoseAndFixMismatchedBrackets() -> void {
+  ErrorRecoveryBuffer fixes(buffer_);
+
+  // Look for mismatched brackets and decide where to add tokens to fix them.
+  //
+  // TODO: For now, we use a greedy algorithm for this. We could do better by
+  // taking indentation into account. For example:
+  //
+  //     1  fn F() {
+  //     2    if (thing1)
+  //     3      thing2;
+  //     4    }
+  //     5  }
+  //
+  // Here, we'll match the `{` on line 1 with the `}` on line 4, and then
+  // report that the `}` on line 5 is unmatched. Instead, we should notice that
+  // line 1 matches better with line 5 due to indentation, and work out that
+  // the missing `{` was on line 2, also based on indentation.
+  open_groups_.clear();
+  for (auto token : buffer_.tokens()) {
+    auto kind = buffer_.GetKind(token);
+    if (kind.is_opening_symbol()) {
+      open_groups_.push_back(token);
+      continue;
+    }
+
+    if (!kind.is_closing_symbol()) {
+      continue;
+    }
+
+    // Find the innermost matching opening symbol.
+    auto opening_it = std::find_if(
+        open_groups_.rbegin(), open_groups_.rend(),
+        [&](TokenIndex opening_token) {
+          return buffer_.GetTokenInfo(opening_token).kind.closing_symbol() ==
+                 kind;
+        });
+    if (opening_it == open_groups_.rend()) {
+      CARBON_DIAGNOSTIC(
+          UnmatchedClosing, Error,
+          "Closing symbol without a corresponding opening symbol.");
+      token_emitter_.Emit(token, UnmatchedClosing);
+      fixes.ReplaceWithError(token);
+      continue;
+    }
+
+    // All intermediate open tokens have no matching close token.
+    for (auto it = open_groups_.rbegin(); it != opening_it; ++it) {
+      DiagnoseUnmatchedOpening(token_emitter_, *it);
+
+      // Add a closing bracket for the unclosed group here.
+      //
+      // TODO: Indicate in the diagnostic that we did this, perhaps by
+      // annotating the snippet.
+      auto opening_kind = buffer_.GetKind(*it);
+      fixes.InsertBefore(token, opening_kind.closing_symbol());
+    }
+
+    open_groups_.erase(opening_it.base() - 1, open_groups_.end());
+  }
+
+  // Diagnose any remaining unmatched opening symbols.
+  for (auto token : open_groups_) {
+    // We don't have a good location to insert a close bracket. Convert the
+    // opening token from a bracket to an error.
+    DiagnoseUnmatchedOpening(token_emitter_, token);
+    fixes.ReplaceWithError(token);
+  }
+
+  CARBON_CHECK(!fixes.empty()) << "Didn't find anything to fix";
+  fixes.Apply();
+  fixes.FixTokenCrossReferences();
 }
 
 auto Lex(SharedValueStores& value_stores, SourceBuffer& source,

+ 31 - 0
toolchain/lex/testdata/fail_mismatched_brackets.carbon

@@ -0,0 +1,31 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// AUTOUPDATE
+// CHECK:STDOUT: - filename: fail_mismatched_brackets.carbon
+// CHECK:STDOUT:   tokens: [
+// CHECK:STDOUT:     { index: 0, kind:  'FileStart', line: {{ *\d+}}, column:  1, indent: 1, spelling: '', has_trailing_space: true },
+
+// CHECK:STDERR: fail_mismatched_brackets.carbon:[[@LINE+3]]:1: ERROR: Closing symbol without a corresponding opening symbol.
+// CHECK:STDERR: }
+// CHECK:STDERR: ^
+}
+// CHECK:STDOUT:     { index: 1, kind:      'Error', line: {{ *}}[[@LINE-1]], column:  1, indent: 1, spelling: '}', has_trailing_space: true },
+
+// CHECK:STDERR: fail_mismatched_brackets.carbon:[[@LINE+3]]:3: ERROR: Closing symbol without a corresponding opening symbol.
+// CHECK:STDERR: ( } )
+// CHECK:STDERR:   ^
+( } )
+// CHECK:STDOUT:     { index: 2, kind:  'OpenParen', line: {{ *}}[[@LINE-1]], column:  1, indent: 1, spelling: '(', closing_token: 4, has_trailing_space: true },
+// CHECK:STDOUT:     { index: 3, kind:      'Error', line: {{ *}}[[@LINE-2]], column:  3, indent: 1, spelling: '}', has_trailing_space: true },
+// CHECK:STDOUT:     { index: 4, kind: 'CloseParen', line: {{ *}}[[@LINE-3]], column:  5, indent: 1, spelling: ')', opening_token: 2, has_trailing_space: true },
+
+// CHECK:STDERR: fail_mismatched_brackets.carbon:[[@LINE+3]]:1: ERROR: Opening symbol without a corresponding closing symbol.
+// CHECK:STDERR: [
+// CHECK:STDERR: ^
+[
+// CHECK:STDOUT:     { index: 5, kind:      'Error', line: {{ *}}[[@LINE-1]], column:  1, indent: 1, spelling: '[', has_trailing_space: true },
+
+// CHECK:STDOUT:     { index: 6, kind:    'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '' },
+// CHECK:STDOUT:   ]

+ 38 - 0
toolchain/lex/testdata/fail_mismatched_brackets_2.carbon

@@ -0,0 +1,38 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// TODO: For this example, we should ideally report a missing `{` on the `if`
+// line, for example based on the indentation of the surrounding lines.
+
+fn F() {
+  if (thing1)
+    thing2;
+  }
+}
+
+// The check lines are inserted at the end so that they don't disrupt the
+// indentation of lines in the text.
+// AUTOUPDATE
+// CHECK:STDERR: fail_mismatched_brackets_2.carbon:[[@LINE-5]]:1: ERROR: Closing symbol without a corresponding opening symbol.
+// CHECK:STDERR: }
+// CHECK:STDERR: ^
+// CHECK:STDOUT: - filename: fail_mismatched_brackets_2.carbon
+// CHECK:STDOUT:   tokens: [
+// CHECK:STDOUT:     { index:  0, kind:       'FileStart', line: {{ *\d+}}, column:  1, indent: 1, spelling: '', has_trailing_space: true },
+// CHECK:STDOUT:     { index:  1, kind:              'Fn', line: {{ *}}[[@LINE-15]], column:  1, indent: 1, spelling: 'fn', has_trailing_space: true },
+// CHECK:STDOUT:     { index:  2, kind:      'Identifier', line: {{ *}}[[@LINE-16]], column:  4, indent: 1, spelling: 'F', identifier: 0 },
+// CHECK:STDOUT:     { index:  3, kind:       'OpenParen', line: {{ *}}[[@LINE-17]], column:  5, indent: 1, spelling: '(', closing_token: 4 },
+// CHECK:STDOUT:     { index:  4, kind:      'CloseParen', line: {{ *}}[[@LINE-18]], column:  6, indent: 1, spelling: ')', opening_token: 3, has_trailing_space: true },
+// CHECK:STDOUT:     { index:  5, kind:  'OpenCurlyBrace', line: {{ *}}[[@LINE-19]], column:  8, indent: 1, spelling: '{', closing_token: 12, has_trailing_space: true },
+// CHECK:STDOUT:     { index:  6, kind:              'If', line: {{ *}}[[@LINE-19]], column:  3, indent: 3, spelling: 'if', has_trailing_space: true },
+// CHECK:STDOUT:     { index:  7, kind:       'OpenParen', line: {{ *}}[[@LINE-20]], column:  6, indent: 3, spelling: '(', closing_token: 9 },
+// CHECK:STDOUT:     { index:  8, kind:      'Identifier', line: {{ *}}[[@LINE-21]], column:  7, indent: 3, spelling: 'thing1', identifier: 1 },
+// CHECK:STDOUT:     { index:  9, kind:      'CloseParen', line: {{ *}}[[@LINE-22]], column: 13, indent: 3, spelling: ')', opening_token: 7, has_trailing_space: true },
+// CHECK:STDOUT:     { index: 10, kind:      'Identifier', line: {{ *}}[[@LINE-22]], column:  5, indent: 5, spelling: 'thing2', identifier: 2 },
+// CHECK:STDOUT:     { index: 11, kind:            'Semi', line: {{ *}}[[@LINE-23]], column: 11, indent: 5, spelling: ';', has_trailing_space: true },
+// CHECK:STDOUT:     { index: 12, kind: 'CloseCurlyBrace', line: {{ *}}[[@LINE-23]], column:  3, indent: 3, spelling: '}', opening_token: 5, has_trailing_space: true },
+// CHECK:STDOUT:     { index: 13, kind:           'Error', line: {{ *}}[[@LINE-23]], column:  1, indent: 1, spelling: '}', has_trailing_space: true },
+
+// CHECK:STDOUT:     { index: 14, kind:         'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '' },
+// CHECK:STDOUT:   ]

+ 19 - 0
toolchain/lex/tokenized_buffer.cpp

@@ -35,6 +35,25 @@ auto TokenizedBuffer::GetColumnNumber(TokenIndex token) const -> int {
   return GetTokenInfo(token).column + 1;
 }
 
+auto TokenizedBuffer::GetEndLocation(TokenIndex token) const
+    -> std::pair<LineIndex, int> {
+  auto line = GetLine(token);
+  int column = GetColumnNumber(token);
+  auto token_text = GetTokenText(token);
+
+  if (auto [before_newline, after_newline] = token_text.rsplit('\n');
+      before_newline.size() == token_text.size()) {
+    // Token fits on one line, advance the column number.
+    column += before_newline.size();
+  } else {
+    // Token contains newlines.
+    line.index += before_newline.count('\n') + 1;
+    column = 1 + after_newline.size();
+  }
+
+  return {line, column};
+}
+
 auto TokenizedBuffer::GetTokenText(TokenIndex token) const -> llvm::StringRef {
   const auto& token_info = GetTokenInfo(token);
   llvm::StringRef fixed_spelling = token_info.kind.fixed_spelling();

+ 4 - 0
toolchain/lex/tokenized_buffer.h

@@ -144,6 +144,10 @@ class TokenizedBuffer : public Printable<TokenizedBuffer> {
   // Returns the 1-based column number.
   auto GetColumnNumber(TokenIndex token) const -> int;
 
+  // Returns the line and 1-based column number of the first character after
+  // this token.
+  auto GetEndLocation(TokenIndex token) const -> std::pair<LineIndex, int>;
+
   // Returns the source text lexed into this token.
   auto GetTokenText(TokenIndex token) const -> llvm::StringRef;
 

+ 7 - 13
toolchain/lex/tokenized_buffer_test.cpp

@@ -508,13 +508,11 @@ TEST_F(LexerTest, MatchingGroups) {
 TEST_F(LexerTest, MismatchedGroups) {
   auto buffer = Lex("{");
   EXPECT_TRUE(buffer.has_errors());
-  EXPECT_THAT(buffer,
-              HasTokens(llvm::ArrayRef<ExpectedToken>{
-                  {TokenKind::FileStart},
-                  {TokenKind::OpenCurlyBrace},
-                  {.kind = TokenKind::CloseCurlyBrace, .recovery = true},
-                  {TokenKind::FileEnd},
-              }));
+  EXPECT_THAT(buffer, HasTokens(llvm::ArrayRef<ExpectedToken>{
+                          {TokenKind::FileStart},
+                          {.kind = TokenKind::Error, .text = "{"},
+                          {TokenKind::FileEnd},
+                      }));
 
   buffer = Lex("}");
   EXPECT_TRUE(buffer.has_errors());
@@ -567,13 +565,9 @@ TEST_F(LexerTest, Whitespace) {
                   true,
                   // }
                   true,
-                  // {
+                  // error {
                   false,
-                  // (
-                  true,
-                  // inserted )
-                  true,
-                  // inserted }
+                  // error (
                   true,
                   // EOF
                   false};

+ 7 - 16
toolchain/parse/testdata/array/fail_require_close_bracket.carbon

@@ -5,31 +5,22 @@
 // AUTOUPDATE
 // TODO: It should emit only one error message.
 
-// CHECK:STDERR: fail_require_close_bracket.carbon:[[@LINE+9]]:8: ERROR: Closing symbol does not match most recent opening symbol.
+// CHECK:STDERR: fail_require_close_bracket.carbon:[[@LINE+6]]:8: ERROR: Opening symbol without a corresponding closing symbol.
 // CHECK:STDERR: var x: [i32;;
 // CHECK:STDERR:        ^
-// CHECK:STDERR: fail_require_close_bracket.carbon:[[@LINE+6]]:13: ERROR: Expected expression.
+// CHECK:STDERR: fail_require_close_bracket.carbon:[[@LINE+3]]:8: ERROR: Expected expression.
 // CHECK:STDERR: var x: [i32;;
-// CHECK:STDERR:             ^
-// CHECK:STDERR: fail_require_close_bracket.carbon:[[@LINE+3]]:13: ERROR: Unexpected tokens before `]`.
-// CHECK:STDERR: var x: [i32;;
-// CHECK:STDERR:             ^
+// CHECK:STDERR:        ^
 var x: [i32;;
 
-// CHECK:STDERR: fail_require_close_bracket.carbon:[[@LINE+16]]:21: ERROR: `var` declarations must end with a `;`.
-// CHECK:STDERR: // CHECK:STDOUT:   ]
-// CHECK:STDERR:                     ^
 // CHECK:STDOUT: - filename: fail_require_close_bracket.carbon
 // CHECK:STDOUT:   parse_tree: [
 // CHECK:STDOUT:     {kind: 'FileStart', text: ''},
 // CHECK:STDOUT:       {kind: 'VariableIntroducer', text: 'var'},
 // CHECK:STDOUT:         {kind: 'IdentifierName', text: 'x'},
-// CHECK:STDOUT:             {kind: 'ArrayExprStart', text: '['},
-// CHECK:STDOUT:             {kind: 'IntTypeLiteral', text: 'i32'},
-// CHECK:STDOUT:           {kind: 'ArrayExprSemi', text: ';', subtree_size: 3},
-// CHECK:STDOUT:           {kind: 'InvalidParse', text: ';', has_error: yes},
-// CHECK:STDOUT:         {kind: 'ArrayExpr', text: ']', has_error: yes, subtree_size: 5},
-// CHECK:STDOUT:       {kind: 'BindingPattern', text: ':', subtree_size: 7},
-// CHECK:STDOUT:     {kind: 'VariableDecl', text: ']', has_error: yes, subtree_size: 9},
+// CHECK:STDOUT:         {kind: 'InvalidParse', text: '[', has_error: yes},
+// CHECK:STDOUT:       {kind: 'BindingPattern', text: ':', has_error: yes, subtree_size: 3},
+// CHECK:STDOUT:     {kind: 'VariableDecl', text: ';', subtree_size: 5},
+// CHECK:STDOUT:     {kind: 'EmptyDecl', text: ';'},
 // CHECK:STDOUT:     {kind: 'FileEnd', text: ''},
 // CHECK:STDOUT:   ]

+ 7 - 16
toolchain/parse/testdata/array/fail_syntax.carbon

@@ -48,19 +48,13 @@ var y: [`];
 
 // --- no_close_bracket.carbon
 
-// CHECK:STDERR: no_close_bracket.carbon:[[@LINE+9]]:8: ERROR: Closing symbol does not match most recent opening symbol.
+// CHECK:STDERR: no_close_bracket.carbon:[[@LINE+6]]:8: ERROR: Opening symbol without a corresponding closing symbol.
 // CHECK:STDERR: var x: [i32;;
 // CHECK:STDERR:        ^
-// CHECK:STDERR: no_close_bracket.carbon:[[@LINE+6]]:13: ERROR: Expected expression.
+// CHECK:STDERR: no_close_bracket.carbon:[[@LINE+3]]:8: ERROR: Expected expression.
 // CHECK:STDERR: var x: [i32;;
-// CHECK:STDERR:             ^
-// CHECK:STDERR: no_close_bracket.carbon:[[@LINE+3]]:13: ERROR: Unexpected tokens before `]`.
-// CHECK:STDERR: var x: [i32;;
-// CHECK:STDERR:             ^
+// CHECK:STDERR:        ^
 var x: [i32;;
-// CHECK:STDERR: no_close_bracket.carbon:[[@LINE+3]]:1: ERROR: `var` declarations must end with a `;`.
-// CHECK:STDERR:
-// CHECK:STDERR: ^
 
 // --- no_semi.carbon
 
@@ -117,13 +111,10 @@ var x: [i32];
 // CHECK:STDOUT:     {kind: 'FileStart', text: ''},
 // CHECK:STDOUT:       {kind: 'VariableIntroducer', text: 'var'},
 // CHECK:STDOUT:         {kind: 'IdentifierName', text: 'x'},
-// CHECK:STDOUT:             {kind: 'ArrayExprStart', text: '['},
-// CHECK:STDOUT:             {kind: 'IntTypeLiteral', text: 'i32'},
-// CHECK:STDOUT:           {kind: 'ArrayExprSemi', text: ';', subtree_size: 3},
-// CHECK:STDOUT:           {kind: 'InvalidParse', text: ';', has_error: yes},
-// CHECK:STDOUT:         {kind: 'ArrayExpr', text: ']', has_error: yes, subtree_size: 5},
-// CHECK:STDOUT:       {kind: 'BindingPattern', text: ':', subtree_size: 7},
-// CHECK:STDOUT:     {kind: 'VariableDecl', text: ']', has_error: yes, subtree_size: 9},
+// CHECK:STDOUT:         {kind: 'InvalidParse', text: '[', has_error: yes},
+// CHECK:STDOUT:       {kind: 'BindingPattern', text: ':', has_error: yes, subtree_size: 3},
+// CHECK:STDOUT:     {kind: 'VariableDecl', text: ';', subtree_size: 5},
+// CHECK:STDOUT:     {kind: 'EmptyDecl', text: ';'},
 // CHECK:STDOUT:     {kind: 'FileEnd', text: ''},
 // CHECK:STDOUT:   ]
 // CHECK:STDOUT: - filename: no_semi.carbon

+ 4 - 12
toolchain/parse/testdata/function/declaration/fail_missing_implicit_close.carbon

@@ -5,27 +5,19 @@
 // AUTOUPDATE
 
 // Fix and uncomment this to test error handling.
-// CHECK:STDERR: fail_missing_implicit_close.carbon:[[@LINE+6]]:7: ERROR: Closing symbol does not match most recent opening symbol.
+// CHECK:STDERR: fail_missing_implicit_close.carbon:[[@LINE+6]]:7: ERROR: Opening symbol without a corresponding closing symbol.
 // CHECK:STDERR: fn Div[();
 // CHECK:STDERR:       ^
-// CHECK:STDERR: fail_missing_implicit_close.carbon:[[@LINE+3]]:8: ERROR: Expected binding pattern.
+// CHECK:STDERR: fail_missing_implicit_close.carbon:[[@LINE+3]]:7: ERROR: `fn` requires a `(` for parameters.
 // CHECK:STDERR: fn Div[();
-// CHECK:STDERR:        ^
+// CHECK:STDERR:       ^
 fn Div[();
 
-// CHECK:STDERR: fail_missing_implicit_close.carbon:[[@LINE+15]]:21: ERROR: A `(` for parameters is required after implicit parameters.
-// CHECK:STDERR: // CHECK:STDOUT:   ]
-// CHECK:STDERR:                     ^
 // CHECK:STDOUT: - filename: fail_missing_implicit_close.carbon
 // CHECK:STDOUT:   parse_tree: [
 // CHECK:STDOUT:     {kind: 'FileStart', text: ''},
 // CHECK:STDOUT:       {kind: 'FunctionIntroducer', text: 'fn'},
 // CHECK:STDOUT:       {kind: 'IdentifierName', text: 'Div'},
-// CHECK:STDOUT:         {kind: 'ImplicitParamListStart', text: '['},
-// CHECK:STDOUT:           {kind: 'IdentifierName', text: '(', has_error: yes},
-// CHECK:STDOUT:           {kind: 'InvalidParse', text: '(', has_error: yes},
-// CHECK:STDOUT:         {kind: 'BindingPattern', text: '(', has_error: yes, subtree_size: 3},
-// CHECK:STDOUT:       {kind: 'ImplicitParamList', text: ']', has_error: yes, subtree_size: 5},
-// CHECK:STDOUT:     {kind: 'FunctionDecl', text: 'fn', has_error: yes, subtree_size: 8},
+// CHECK:STDOUT:     {kind: 'FunctionDecl', text: ';', has_error: yes, subtree_size: 3},
 // CHECK:STDOUT:     {kind: 'FileEnd', text: ''},
 // CHECK:STDOUT:   ]