Quellcode durchsuchen

Avoid unnecessary relexing of the last line (#999)

Co-authored-by: Richard Smith <richard@metafoo.co.uk>
Jon Meow vor 4 Jahren
Ursprung
Commit
22721da92b

+ 13 - 10
toolchain/lexer/tokenized_buffer.cpp

@@ -115,9 +115,9 @@ class TokenizedBuffer::Lexer {
 
   Lexer(TokenizedBuffer& buffer, DiagnosticConsumer& consumer)
       : buffer_(buffer),
-        translator_(buffer),
+        translator_(buffer, &current_column_),
         emitter_(translator_, consumer),
-        token_translator_(buffer),
+        token_translator_(buffer, &current_column_),
         token_emitter_(token_translator_, consumer),
         current_line_(buffer.AddLine({0, 0, 0})),
         current_line_info_(&buffer.GetLineInfo(current_line_)) {}
@@ -883,11 +883,12 @@ auto TokenizedBuffer::SourceBufferLocationTranslator::GetLocation(
 
   // Find the first line starting after the given location. Note that we can't
   // inspect `line.length` here because it is not necessarily correct for the
-  // final line.
+  // final line during lexing (but will be correct later for the parse tree).
   auto line_it = std::partition_point(
       buffer_->line_infos_.begin(), buffer_->line_infos_.end(),
       [offset](const LineInfo& line) { return line.start <= offset; });
-  bool incomplete_line_info = line_it == buffer_->line_infos_.end();
+  bool incomplete_line_info = last_line_lexed_to_column_ != nullptr &&
+                              line_it == buffer_->line_infos_.end();
 
   // Step back one line to find the line containing the given position.
   CHECK(line_it != buffer_->line_infos_.begin())
@@ -897,11 +898,12 @@ auto TokenizedBuffer::SourceBufferLocationTranslator::GetLocation(
   int column_number = offset - line_it->start;
 
   // We might still be lexing the last line. If so, check to see if there are
-  // any newline characters between the start of this line and the given
-  // location.
-  if (incomplete_line_info) {
-    column_number = 0;
-    for (int64_t i = line_it->start; i != offset; ++i) {
+  // any newline characters between the position we've finished lexing up to
+  // and the given location.
+  if (incomplete_line_info && column_number > *last_line_lexed_to_column_) {
+    column_number = *last_line_lexed_to_column_;
+    for (int64_t i = line_it->start + *last_line_lexed_to_column_; i != offset;
+         ++i) {
       if (buffer_->source_->Text()[i] == '\n') {
         ++line_number;
         column_number = 0;
@@ -927,7 +929,8 @@ auto TokenizedBuffer::TokenLocationTranslator::GetLocation(Token token)
   // Find the corresponding file location.
   // TODO: Should we somehow indicate in the diagnostic location if this token
   // is a recovery token that doesn't correspond to the original source?
-  return SourceBufferLocationTranslator(*buffer_).GetLocation(token_start);
+  return SourceBufferLocationTranslator(*buffer_, last_line_lexed_to_column_)
+      .GetLocation(token_start);
 }
 
 }  // namespace Carbon

+ 13 - 4
toolchain/lexer/tokenized_buffer.h

@@ -246,14 +246,18 @@ class TokenizedBuffer {
   class TokenLocationTranslator
       : public DiagnosticLocationTranslator<Internal::TokenizedBufferToken> {
    public:
-    explicit TokenLocationTranslator(TokenizedBuffer& buffer)
-        : buffer_(&buffer) {}
+    explicit TokenLocationTranslator(TokenizedBuffer& buffer,
+                                     int* last_line_lexed_to_column)
+        : buffer_(&buffer),
+          last_line_lexed_to_column_(last_line_lexed_to_column) {}
 
     // Map the given token into a diagnostic location.
     auto GetLocation(Token token) -> Diagnostic::Location override;
 
    private:
     TokenizedBuffer* buffer_;
+    // Passed to SourceBufferLocationTranslator.
+    int* last_line_lexed_to_column_;
   };
 
   // Lexes a buffer of source code into a tokenized buffer.
@@ -366,8 +370,10 @@ class TokenizedBuffer {
   class SourceBufferLocationTranslator
       : public DiagnosticLocationTranslator<const char*> {
    public:
-    explicit SourceBufferLocationTranslator(TokenizedBuffer& buffer)
-        : buffer_(&buffer) {}
+    explicit SourceBufferLocationTranslator(TokenizedBuffer& buffer,
+                                            int* last_line_lexed_to_column)
+        : buffer_(&buffer),
+          last_line_lexed_to_column_(last_line_lexed_to_column) {}
 
     // Map the given position within the source buffer into a diagnostic
     // location.
@@ -375,6 +381,9 @@ class TokenizedBuffer {
 
    private:
     TokenizedBuffer* buffer_;
+    // The last lexed column, for determining whether the last line should be
+    // checked for unlexed newlines. May be null after lexing is complete.
+    int* last_line_lexed_to_column_;
   };
 
   // Specifies minimum widths to use when printing a token's fields via

BIN
toolchain/parser/fuzzer_corpus/7fb2ef54332fca52e531ccaf4d3d4a8a9d664634


+ 2 - 1
toolchain/parser/parse_tree.cpp

@@ -22,7 +22,8 @@ namespace Carbon {
 
 auto ParseTree::Parse(TokenizedBuffer& tokens, DiagnosticConsumer& consumer)
     -> ParseTree {
-  TokenizedBuffer::TokenLocationTranslator translator(tokens);
+  TokenizedBuffer::TokenLocationTranslator translator(
+      tokens, /*last_line_lexed_to_column=*/nullptr);
   TokenDiagnosticEmitter emitter(translator, consumer);
 
   // Delegate to the parser.