Sfoglia il codice sorgente

Lexer style cleanup (#1002)

Jon Meow 4 anni fa
parent
commit
fd489b69ad
1 ha cambiato i file con 137 aggiunte e 134 eliminazioni
  1. 137 134
      toolchain/lexer/tokenized_buffer.cpp

+ 137 - 134
toolchain/lexer/tokenized_buffer.cpp

@@ -86,32 +86,7 @@ auto VariantMatch(V&& v, Fs&&... fs) -> decltype(auto) {
 // the different lexing steps that may be used. It directly updates the provided
 // the different lexing steps that may be used. It directly updates the provided
 // tokenized buffer with the lexed tokens.
 // tokenized buffer with the lexed tokens.
 class TokenizedBuffer::Lexer {
 class TokenizedBuffer::Lexer {
-  TokenizedBuffer& buffer;
-
-  SourceBufferLocationTranslator translator;
-  LexerDiagnosticEmitter emitter;
-
-  TokenLocationTranslator token_translator;
-  TokenDiagnosticEmitter token_emitter;
-
-  Line current_line;
-  LineInfo* current_line_info;
-
-  int current_column = 0;
-  bool set_indent = false;
-
-  llvm::SmallVector<Token, 8> open_groups;
-
  public:
  public:
-  Lexer(TokenizedBuffer& buffer, DiagnosticConsumer& consumer)
-      : buffer(buffer),
-        translator(buffer),
-        emitter(translator, consumer),
-        token_translator(buffer),
-        token_emitter(token_translator, consumer),
-        current_line(buffer.AddLine({0, 0, 0})),
-        current_line_info(&buffer.GetLineInfo(current_line)) {}
-
   // Symbolic result of a lexing action. This indicates whether we successfully
   // Symbolic result of a lexing action. This indicates whether we successfully
   // lexed a token, or whether other lexing actions should be attempted.
   // lexed a token, or whether other lexing actions should be attempted.
   //
   //
@@ -119,9 +94,6 @@ class TokenizedBuffer::Lexer {
   // more self documenting, and by consuming the actual token constructively
   // more self documenting, and by consuming the actual token constructively
   // when one is produced, it helps ensure the correct result is returned.
   // when one is produced, it helps ensure the correct result is returned.
   class LexResult {
   class LexResult {
-    bool formed_token;
-    explicit LexResult(bool formed_token) : formed_token(formed_token) {}
-
    public:
    public:
     // Consumes (and discard) a valid token to construct a result
     // Consumes (and discard) a valid token to construct a result
     // indicating a token has been produced. Relies on implicit conversions.
     // indicating a token has been produced. Relies on implicit conversions.
@@ -133,24 +105,38 @@ class TokenizedBuffer::Lexer {
 
 
     // Tests whether a token was produced by the lexing routine, and
     // Tests whether a token was produced by the lexing routine, and
     // the lexer can continue forming tokens.
     // the lexer can continue forming tokens.
-    explicit operator bool() const { return formed_token; }
+    explicit operator bool() const { return formed_token_; }
+
+   private:
+    explicit LexResult(bool formed_token) : formed_token_(formed_token) {}
+
+    bool formed_token_;
   };
   };
 
 
+  Lexer(TokenizedBuffer& buffer, DiagnosticConsumer& consumer)
+      : buffer_(buffer),
+        translator_(buffer),
+        emitter_(translator_, consumer),
+        token_translator_(buffer),
+        token_emitter_(token_translator_, consumer),
+        current_line_(buffer.AddLine({0, 0, 0})),
+        current_line_info_(&buffer.GetLineInfo(current_line_)) {}
+
   // Perform the necessary bookkeeping to step past a newline at the current
   // Perform the necessary bookkeeping to step past a newline at the current
   // line and column.
   // line and column.
   auto HandleNewline() -> void {
   auto HandleNewline() -> void {
-    current_line_info->length = current_column;
+    current_line_info_->length = current_column_;
 
 
-    current_line =
-        buffer.AddLine({current_line_info->start + current_column + 1, 0, 0});
-    current_line_info = &buffer.GetLineInfo(current_line);
-    current_column = 0;
-    set_indent = false;
+    current_line_ = buffer_.AddLine(
+        {current_line_info_->start + current_column_ + 1, 0, 0});
+    current_line_info_ = &buffer_.GetLineInfo(current_line_);
+    current_column_ = 0;
+    set_indent_ = false;
   }
   }
 
 
   auto NoteWhitespace() -> void {
   auto NoteWhitespace() -> void {
-    if (!buffer.token_infos_.empty()) {
-      buffer.token_infos_.back().has_trailing_space = true;
+    if (!buffer_.token_infos_.empty()) {
+      buffer_.token_infos_.back().has_trailing_space = true;
     }
     }
   }
   }
 
 
@@ -162,16 +148,16 @@ class TokenizedBuffer::Lexer {
       // were whitespace.
       // were whitespace.
       if (source_text.startswith("//")) {
       if (source_text.startswith("//")) {
         // Any comment must be the only non-whitespace on the line.
         // Any comment must be the only non-whitespace on the line.
-        if (set_indent) {
-          emitter.EmitError<TrailingComment>(source_text.begin());
+        if (set_indent_) {
+          emitter_.EmitError<TrailingComment>(source_text.begin());
         }
         }
         // The introducer '//' must be followed by whitespace or EOF.
         // The introducer '//' must be followed by whitespace or EOF.
         if (source_text.size() > 2 && !IsSpace(source_text[2])) {
         if (source_text.size() > 2 && !IsSpace(source_text[2])) {
-          emitter.EmitError<NoWhitespaceAfterCommentIntroducer>(
+          emitter_.EmitError<NoWhitespaceAfterCommentIntroducer>(
               source_text.begin() + 2);
               source_text.begin() + 2);
         }
         }
         while (!source_text.empty() && source_text.front() != '\n') {
         while (!source_text.empty() && source_text.front() != '\n') {
-          ++current_column;
+          ++current_column_;
           source_text = source_text.drop_front();
           source_text = source_text.drop_front();
         }
         }
         if (source_text.empty()) {
         if (source_text.empty()) {
@@ -194,7 +180,7 @@ class TokenizedBuffer::Lexer {
           // to avoid creating an empty line.
           // to avoid creating an empty line.
           source_text = source_text.drop_front();
           source_text = source_text.drop_front();
           if (source_text.empty()) {
           if (source_text.empty()) {
-            current_line_info->length = current_column;
+            current_line_info_->length = current_column_;
             return false;
             return false;
           }
           }
 
 
@@ -209,7 +195,7 @@ class TokenizedBuffer::Lexer {
           // whitespace as well as special handling to allow better tokenization
           // whitespace as well as special handling to allow better tokenization
           // of operators. This is just a stub to check that our column
           // of operators. This is just a stub to check that our column
           // management works.
           // management works.
-          ++current_column;
+          ++current_column_;
           source_text = source_text.drop_front();
           source_text = source_text.drop_front();
           continue;
           continue;
       }
       }
@@ -217,7 +203,7 @@ class TokenizedBuffer::Lexer {
 
 
     CHECK(source_text.empty()) << "Cannot reach here w/o finishing the text!";
     CHECK(source_text.empty()) << "Cannot reach here w/o finishing the text!";
     // Update the line length as this is also the end of a line.
     // Update the line length as this is also the end of a line.
-    current_line_info->length = current_column;
+    current_line_info_->length = current_column_;
     return false;
     return false;
   }
   }
 
 
@@ -228,43 +214,43 @@ class TokenizedBuffer::Lexer {
       return LexResult::NoMatch();
       return LexResult::NoMatch();
     }
     }
 
 
-    int int_column = current_column;
+    int int_column = current_column_;
     int token_size = literal->Text().size();
     int token_size = literal->Text().size();
-    current_column += token_size;
+    current_column_ += token_size;
     source_text = source_text.drop_front(token_size);
     source_text = source_text.drop_front(token_size);
 
 
-    if (!set_indent) {
-      current_line_info->indent = int_column;
-      set_indent = true;
+    if (!set_indent_) {
+      current_line_info_->indent = int_column;
+      set_indent_ = true;
     }
     }
 
 
     return VariantMatch(
     return VariantMatch(
-        literal->ComputeValue(emitter),
+        literal->ComputeValue(emitter_),
         [&](LexedNumericLiteral::IntegerValue&& value) {
         [&](LexedNumericLiteral::IntegerValue&& value) {
-          auto token = buffer.AddToken({.kind = TokenKind::IntegerLiteral(),
-                                        .token_line = current_line,
-                                        .column = int_column});
-          buffer.GetTokenInfo(token).literal_index =
-              buffer.literal_int_storage_.size();
-          buffer.literal_int_storage_.push_back(std::move(value.value));
+          auto token = buffer_.AddToken({.kind = TokenKind::IntegerLiteral(),
+                                         .token_line = current_line_,
+                                         .column = int_column});
+          buffer_.GetTokenInfo(token).literal_index =
+              buffer_.literal_int_storage_.size();
+          buffer_.literal_int_storage_.push_back(std::move(value.value));
           return token;
           return token;
         },
         },
         [&](LexedNumericLiteral::RealValue&& value) {
         [&](LexedNumericLiteral::RealValue&& value) {
-          auto token = buffer.AddToken({.kind = TokenKind::RealLiteral(),
-                                        .token_line = current_line,
-                                        .column = int_column});
-          buffer.GetTokenInfo(token).literal_index =
-              buffer.literal_int_storage_.size();
-          buffer.literal_int_storage_.push_back(std::move(value.mantissa));
-          buffer.literal_int_storage_.push_back(std::move(value.exponent));
-          assert(buffer.GetRealLiteral(token).IsDecimal() ==
+          auto token = buffer_.AddToken({.kind = TokenKind::RealLiteral(),
+                                         .token_line = current_line_,
+                                         .column = int_column});
+          buffer_.GetTokenInfo(token).literal_index =
+              buffer_.literal_int_storage_.size();
+          buffer_.literal_int_storage_.push_back(std::move(value.mantissa));
+          buffer_.literal_int_storage_.push_back(std::move(value.exponent));
+          assert(buffer_.GetRealLiteral(token).IsDecimal() ==
                  (value.radix == 10));
                  (value.radix == 10));
           return token;
           return token;
         },
         },
         [&](LexedNumericLiteral::UnrecoverableError) {
         [&](LexedNumericLiteral::UnrecoverableError) {
-          auto token = buffer.AddToken({
+          auto token = buffer_.AddToken({
               .kind = TokenKind::Error(),
               .kind = TokenKind::Error(),
-              .token_line = current_line,
+              .token_line = current_line_,
               .column = int_column,
               .column = int_column,
               .error_length = token_size,
               .error_length = token_size,
           });
           });
@@ -279,39 +265,39 @@ class TokenizedBuffer::Lexer {
       return LexResult::NoMatch();
       return LexResult::NoMatch();
     }
     }
 
 
-    Line string_line = current_line;
-    int string_column = current_column;
+    Line string_line = current_line_;
+    int string_column = current_column_;
     int literal_size = literal->Text().size();
     int literal_size = literal->Text().size();
     source_text = source_text.drop_front(literal_size);
     source_text = source_text.drop_front(literal_size);
 
 
-    if (!set_indent) {
-      current_line_info->indent = string_column;
-      set_indent = true;
+    if (!set_indent_) {
+      current_line_info_->indent = string_column;
+      set_indent_ = true;
     }
     }
 
 
     // Update line and column information.
     // Update line and column information.
     if (!literal->IsMultiLine()) {
     if (!literal->IsMultiLine()) {
-      current_column += literal_size;
+      current_column_ += literal_size;
     } else {
     } else {
       for (char c : literal->Text()) {
       for (char c : literal->Text()) {
         if (c == '\n') {
         if (c == '\n') {
           HandleNewline();
           HandleNewline();
           // The indentation of all lines in a multi-line string literal is
           // The indentation of all lines in a multi-line string literal is
           // that of the first line.
           // that of the first line.
-          current_line_info->indent = string_column;
-          set_indent = true;
+          current_line_info_->indent = string_column;
+          set_indent_ = true;
         } else {
         } else {
-          ++current_column;
+          ++current_column_;
         }
         }
       }
       }
     }
     }
 
 
-    auto token = buffer.AddToken({.kind = TokenKind::StringLiteral(),
-                                  .token_line = string_line,
-                                  .column = string_column});
-    buffer.GetTokenInfo(token).literal_index =
-        buffer.literal_string_storage_.size();
-    buffer.literal_string_storage_.push_back(literal->ComputeValue(emitter));
+    auto token = buffer_.AddToken({.kind = TokenKind::StringLiteral(),
+                                   .token_line = string_line,
+                                   .column = string_column});
+    buffer_.GetTokenInfo(token).literal_index =
+        buffer_.literal_string_storage_.size();
+    buffer_.literal_string_storage_.push_back(literal->ComputeValue(emitter_));
     return token;
     return token;
   }
   }
 
 
@@ -325,22 +311,22 @@ class TokenizedBuffer::Lexer {
       return LexResult::NoMatch();
       return LexResult::NoMatch();
     }
     }
 
 
-    if (!set_indent) {
-      current_line_info->indent = current_column;
-      set_indent = true;
+    if (!set_indent_) {
+      current_line_info_->indent = current_column_;
+      set_indent_ = true;
     }
     }
 
 
     CloseInvalidOpenGroups(kind);
     CloseInvalidOpenGroups(kind);
 
 
     const char* location = source_text.begin();
     const char* location = source_text.begin();
-    Token token = buffer.AddToken(
-        {.kind = kind, .token_line = current_line, .column = current_column});
-    current_column += kind.GetFixedSpelling().size();
+    Token token = buffer_.AddToken(
+        {.kind = kind, .token_line = current_line_, .column = current_column_});
+    current_column_ += kind.GetFixedSpelling().size();
     source_text = source_text.drop_front(kind.GetFixedSpelling().size());
     source_text = source_text.drop_front(kind.GetFixedSpelling().size());
 
 
     // Opening symbols just need to be pushed onto our queue of opening groups.
     // Opening symbols just need to be pushed onto our queue of opening groups.
     if (kind.IsOpeningSymbol()) {
     if (kind.IsOpeningSymbol()) {
-      open_groups.push_back(token);
+      open_groups_.push_back(token);
       return token;
       return token;
     }
     }
 
 
@@ -349,22 +335,22 @@ class TokenizedBuffer::Lexer {
       return token;
       return token;
     }
     }
 
 
-    TokenInfo& closing_token_info = buffer.GetTokenInfo(token);
+    TokenInfo& closing_token_info = buffer_.GetTokenInfo(token);
 
 
     // Check that there is a matching opening symbol before we consume this as
     // Check that there is a matching opening symbol before we consume this as
     // a closing symbol.
     // a closing symbol.
-    if (open_groups.empty()) {
+    if (open_groups_.empty()) {
       closing_token_info.kind = TokenKind::Error();
       closing_token_info.kind = TokenKind::Error();
       closing_token_info.error_length = kind.GetFixedSpelling().size();
       closing_token_info.error_length = kind.GetFixedSpelling().size();
 
 
-      emitter.EmitError<UnmatchedClosing>(location);
+      emitter_.EmitError<UnmatchedClosing>(location);
       // Note that this still returns true as we do consume a symbol.
       // Note that this still returns true as we do consume a symbol.
       return token;
       return token;
     }
     }
 
 
     // Finally can handle a normal closing symbol.
     // Finally can handle a normal closing symbol.
-    Token opening_token = open_groups.pop_back_val();
-    TokenInfo& opening_token_info = buffer.GetTokenInfo(opening_token);
+    Token opening_token = open_groups_.pop_back_val();
+    TokenInfo& opening_token_info = buffer_.GetTokenInfo(opening_token);
     opening_token_info.closing_token = token;
     opening_token_info.closing_token = token;
     closing_token_info.opening_token = opening_token;
     closing_token_info.opening_token = opening_token;
     return token;
     return token;
@@ -404,11 +390,11 @@ class TokenizedBuffer::Lexer {
       return LexResult::NoMatch();
       return LexResult::NoMatch();
     }
     }
 
 
-    auto token = buffer.AddToken(
-        {.kind = *kind, .token_line = current_line, .column = column});
-    buffer.GetTokenInfo(token).literal_index =
-        buffer.literal_int_storage_.size();
-    buffer.literal_int_storage_.push_back(std::move(suffix_value));
+    auto token = buffer_.AddToken(
+        {.kind = *kind, .token_line = current_line_, .column = column});
+    buffer_.GetTokenInfo(token).literal_index =
+        buffer_.literal_int_storage_.size();
+    buffer_.literal_int_storage_.push_back(std::move(suffix_value));
     return token;
     return token;
   }
   }
 
 
@@ -419,39 +405,39 @@ class TokenizedBuffer::Lexer {
       return;
       return;
     }
     }
 
 
-    while (!open_groups.empty()) {
-      Token opening_token = open_groups.back();
-      TokenKind opening_kind = buffer.GetTokenInfo(opening_token).kind;
+    while (!open_groups_.empty()) {
+      Token opening_token = open_groups_.back();
+      TokenKind opening_kind = buffer_.GetTokenInfo(opening_token).kind;
       if (kind == opening_kind.GetClosingSymbol()) {
       if (kind == opening_kind.GetClosingSymbol()) {
         return;
         return;
       }
       }
 
 
-      open_groups.pop_back();
-      token_emitter.EmitError<MismatchedClosing>(opening_token);
+      open_groups_.pop_back();
+      token_emitter_.EmitError<MismatchedClosing>(opening_token);
 
 
-      CHECK(!buffer.Tokens().empty()) << "Must have a prior opening token!";
-      Token prev_token = buffer.Tokens().end()[-1];
+      CHECK(!buffer_.Tokens().empty()) << "Must have a prior opening token!";
+      Token prev_token = buffer_.Tokens().end()[-1];
 
 
       // TODO: do a smarter backwards scan for where to put the closing
       // TODO: do a smarter backwards scan for where to put the closing
       // token.
       // token.
-      Token closing_token = buffer.AddToken(
+      Token closing_token = buffer_.AddToken(
           {.kind = opening_kind.GetClosingSymbol(),
           {.kind = opening_kind.GetClosingSymbol(),
-           .has_trailing_space = buffer.HasTrailingWhitespace(prev_token),
+           .has_trailing_space = buffer_.HasTrailingWhitespace(prev_token),
            .is_recovery = true,
            .is_recovery = true,
-           .token_line = current_line,
-           .column = current_column});
-      TokenInfo& opening_token_info = buffer.GetTokenInfo(opening_token);
-      TokenInfo& closing_token_info = buffer.GetTokenInfo(closing_token);
+           .token_line = current_line_,
+           .column = current_column_});
+      TokenInfo& opening_token_info = buffer_.GetTokenInfo(opening_token);
+      TokenInfo& closing_token_info = buffer_.GetTokenInfo(closing_token);
       opening_token_info.closing_token = closing_token;
       opening_token_info.closing_token = closing_token;
       closing_token_info.opening_token = opening_token;
       closing_token_info.opening_token = opening_token;
     }
     }
   }
   }
 
 
   auto GetOrCreateIdentifier(llvm::StringRef text) -> Identifier {
   auto GetOrCreateIdentifier(llvm::StringRef text) -> Identifier {
-    auto insert_result = buffer.identifier_map_.insert(
-        {text, Identifier(buffer.identifier_infos_.size())});
+    auto insert_result = buffer_.identifier_map_.insert(
+        {text, Identifier(buffer_.identifier_infos_.size())});
     if (insert_result.second) {
     if (insert_result.second) {
-      buffer.identifier_infos_.push_back({text});
+      buffer_.identifier_infos_.push_back({text});
     }
     }
     return insert_result.first->second;
     return insert_result.first->second;
   }
   }
@@ -461,17 +447,17 @@ class TokenizedBuffer::Lexer {
       return LexResult::NoMatch();
       return LexResult::NoMatch();
     }
     }
 
 
-    if (!set_indent) {
-      current_line_info->indent = current_column;
-      set_indent = true;
+    if (!set_indent_) {
+      current_line_info_->indent = current_column_;
+      set_indent_ = true;
     }
     }
 
 
     // Take the valid characters off the front of the source buffer.
     // Take the valid characters off the front of the source buffer.
     llvm::StringRef identifier_text =
     llvm::StringRef identifier_text =
         source_text.take_while([](char c) { return IsAlnum(c) || c == '_'; });
         source_text.take_while([](char c) { return IsAlnum(c) || c == '_'; });
     CHECK(!identifier_text.empty()) << "Must have at least one character!";
     CHECK(!identifier_text.empty()) << "Must have at least one character!";
-    int identifier_column = current_column;
-    current_column += identifier_text.size();
+    int identifier_column = current_column_;
+    current_column_ += identifier_text.size();
     source_text = source_text.drop_front(identifier_text.size());
     source_text = source_text.drop_front(identifier_text.size());
 
 
     // Check if the text is a type literal, and if so form such a literal.
     // Check if the text is a type literal, and if so form such a literal.
@@ -486,16 +472,16 @@ class TokenizedBuffer::Lexer {
 #include "toolchain/lexer/token_registry.def"
 #include "toolchain/lexer/token_registry.def"
                          .Default(TokenKind::Error());
                          .Default(TokenKind::Error());
     if (kind != TokenKind::Error()) {
     if (kind != TokenKind::Error()) {
-      return buffer.AddToken({.kind = kind,
-                              .token_line = current_line,
-                              .column = identifier_column});
+      return buffer_.AddToken({.kind = kind,
+                               .token_line = current_line_,
+                               .column = identifier_column});
     }
     }
 
 
     // Otherwise we have a generic identifier.
     // Otherwise we have a generic identifier.
-    return buffer.AddToken({.kind = TokenKind::Identifier(),
-                            .token_line = current_line,
-                            .column = identifier_column,
-                            .id = GetOrCreateIdentifier(identifier_text)});
+    return buffer_.AddToken({.kind = TokenKind::Identifier(),
+                             .token_line = current_line_,
+                             .column = identifier_column,
+                             .id = GetOrCreateIdentifier(identifier_text)});
   }
   }
 
 
   auto LexError(llvm::StringRef& source_text) -> LexResult {
   auto LexError(llvm::StringRef& source_text) -> LexResult {
@@ -522,23 +508,40 @@ class TokenizedBuffer::Lexer {
 
 
     // Longer errors get to be two tokens.
     // Longer errors get to be two tokens.
     error_text = error_text.substr(0, std::numeric_limits<int32_t>::max());
     error_text = error_text.substr(0, std::numeric_limits<int32_t>::max());
-    auto token = buffer.AddToken(
+    auto token = buffer_.AddToken(
         {.kind = TokenKind::Error(),
         {.kind = TokenKind::Error(),
-         .token_line = current_line,
-         .column = current_column,
+         .token_line = current_line_,
+         .column = current_column_,
          .error_length = static_cast<int32_t>(error_text.size())});
          .error_length = static_cast<int32_t>(error_text.size())});
-    emitter.EmitError<UnrecognizedCharacters>(error_text.begin());
+    emitter_.EmitError<UnrecognizedCharacters>(error_text.begin());
 
 
-    current_column += error_text.size();
+    current_column_ += error_text.size();
     source_text = source_text.drop_front(error_text.size());
     source_text = source_text.drop_front(error_text.size());
     return token;
     return token;
   }
   }
 
 
   auto AddEndOfFileToken() -> void {
   auto AddEndOfFileToken() -> void {
-    buffer.AddToken({.kind = TokenKind::EndOfFile(),
-                     .token_line = current_line,
-                     .column = current_column});
+    buffer_.AddToken({.kind = TokenKind::EndOfFile(),
+                      .token_line = current_line_,
+                      .column = current_column_});
   }
   }
+
+ private:
+  TokenizedBuffer& buffer_;
+
+  SourceBufferLocationTranslator translator_;
+  LexerDiagnosticEmitter emitter_;
+
+  TokenLocationTranslator token_translator_;
+  TokenDiagnosticEmitter token_emitter_;
+
+  Line current_line_;
+  LineInfo* current_line_info_;
+
+  int current_column_ = 0;
+  bool set_indent_ = false;
+
+  llvm::SmallVector<Token, 8> open_groups_;
 };
 };
 
 
 auto TokenizedBuffer::Lex(SourceBuffer& source, DiagnosticConsumer& consumer)
 auto TokenizedBuffer::Lex(SourceBuffer& source, DiagnosticConsumer& consumer)