|
@@ -86,32 +86,7 @@ auto VariantMatch(V&& v, Fs&&... fs) -> decltype(auto) {
|
|
|
// the different lexing steps that may be used. It directly updates the provided
|
|
// the different lexing steps that may be used. It directly updates the provided
|
|
|
// tokenized buffer with the lexed tokens.
|
|
// tokenized buffer with the lexed tokens.
|
|
|
class TokenizedBuffer::Lexer {
|
|
class TokenizedBuffer::Lexer {
|
|
|
- TokenizedBuffer& buffer;
|
|
|
|
|
-
|
|
|
|
|
- SourceBufferLocationTranslator translator;
|
|
|
|
|
- LexerDiagnosticEmitter emitter;
|
|
|
|
|
-
|
|
|
|
|
- TokenLocationTranslator token_translator;
|
|
|
|
|
- TokenDiagnosticEmitter token_emitter;
|
|
|
|
|
-
|
|
|
|
|
- Line current_line;
|
|
|
|
|
- LineInfo* current_line_info;
|
|
|
|
|
-
|
|
|
|
|
- int current_column = 0;
|
|
|
|
|
- bool set_indent = false;
|
|
|
|
|
-
|
|
|
|
|
- llvm::SmallVector<Token, 8> open_groups;
|
|
|
|
|
-
|
|
|
|
|
public:
|
|
public:
|
|
|
- Lexer(TokenizedBuffer& buffer, DiagnosticConsumer& consumer)
|
|
|
|
|
- : buffer(buffer),
|
|
|
|
|
- translator(buffer),
|
|
|
|
|
- emitter(translator, consumer),
|
|
|
|
|
- token_translator(buffer),
|
|
|
|
|
- token_emitter(token_translator, consumer),
|
|
|
|
|
- current_line(buffer.AddLine({0, 0, 0})),
|
|
|
|
|
- current_line_info(&buffer.GetLineInfo(current_line)) {}
|
|
|
|
|
-
|
|
|
|
|
// Symbolic result of a lexing action. This indicates whether we successfully
|
|
// Symbolic result of a lexing action. This indicates whether we successfully
|
|
|
// lexed a token, or whether other lexing actions should be attempted.
|
|
// lexed a token, or whether other lexing actions should be attempted.
|
|
|
//
|
|
//
|
|
@@ -119,9 +94,6 @@ class TokenizedBuffer::Lexer {
|
|
|
// more self documenting, and by consuming the actual token constructively
|
|
// more self documenting, and by consuming the actual token constructively
|
|
|
// when one is produced, it helps ensure the correct result is returned.
|
|
// when one is produced, it helps ensure the correct result is returned.
|
|
|
class LexResult {
|
|
class LexResult {
|
|
|
- bool formed_token;
|
|
|
|
|
- explicit LexResult(bool formed_token) : formed_token(formed_token) {}
|
|
|
|
|
-
|
|
|
|
|
public:
|
|
public:
|
|
|
// Consumes (and discard) a valid token to construct a result
|
|
// Consumes (and discard) a valid token to construct a result
|
|
|
// indicating a token has been produced. Relies on implicit conversions.
|
|
// indicating a token has been produced. Relies on implicit conversions.
|
|
@@ -133,24 +105,38 @@ class TokenizedBuffer::Lexer {
|
|
|
|
|
|
|
|
// Tests whether a token was produced by the lexing routine, and
|
|
// Tests whether a token was produced by the lexing routine, and
|
|
|
// the lexer can continue forming tokens.
|
|
// the lexer can continue forming tokens.
|
|
|
- explicit operator bool() const { return formed_token; }
|
|
|
|
|
|
|
+ explicit operator bool() const { return formed_token_; }
|
|
|
|
|
+
|
|
|
|
|
+ private:
|
|
|
|
|
+ explicit LexResult(bool formed_token) : formed_token_(formed_token) {}
|
|
|
|
|
+
|
|
|
|
|
+ bool formed_token_;
|
|
|
};
|
|
};
|
|
|
|
|
|
|
|
|
|
+ Lexer(TokenizedBuffer& buffer, DiagnosticConsumer& consumer)
|
|
|
|
|
+ : buffer_(buffer),
|
|
|
|
|
+ translator_(buffer),
|
|
|
|
|
+ emitter_(translator_, consumer),
|
|
|
|
|
+ token_translator_(buffer),
|
|
|
|
|
+ token_emitter_(token_translator_, consumer),
|
|
|
|
|
+ current_line_(buffer.AddLine({0, 0, 0})),
|
|
|
|
|
+ current_line_info_(&buffer.GetLineInfo(current_line_)) {}
|
|
|
|
|
+
|
|
|
// Perform the necessary bookkeeping to step past a newline at the current
|
|
// Perform the necessary bookkeeping to step past a newline at the current
|
|
|
// line and column.
|
|
// line and column.
|
|
|
auto HandleNewline() -> void {
|
|
auto HandleNewline() -> void {
|
|
|
- current_line_info->length = current_column;
|
|
|
|
|
|
|
+ current_line_info_->length = current_column_;
|
|
|
|
|
|
|
|
- current_line =
|
|
|
|
|
- buffer.AddLine({current_line_info->start + current_column + 1, 0, 0});
|
|
|
|
|
- current_line_info = &buffer.GetLineInfo(current_line);
|
|
|
|
|
- current_column = 0;
|
|
|
|
|
- set_indent = false;
|
|
|
|
|
|
|
+ current_line_ = buffer_.AddLine(
|
|
|
|
|
+ {current_line_info_->start + current_column_ + 1, 0, 0});
|
|
|
|
|
+ current_line_info_ = &buffer_.GetLineInfo(current_line_);
|
|
|
|
|
+ current_column_ = 0;
|
|
|
|
|
+ set_indent_ = false;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
auto NoteWhitespace() -> void {
|
|
auto NoteWhitespace() -> void {
|
|
|
- if (!buffer.token_infos_.empty()) {
|
|
|
|
|
- buffer.token_infos_.back().has_trailing_space = true;
|
|
|
|
|
|
|
+ if (!buffer_.token_infos_.empty()) {
|
|
|
|
|
+ buffer_.token_infos_.back().has_trailing_space = true;
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
@@ -162,16 +148,16 @@ class TokenizedBuffer::Lexer {
|
|
|
// were whitespace.
|
|
// were whitespace.
|
|
|
if (source_text.startswith("//")) {
|
|
if (source_text.startswith("//")) {
|
|
|
// Any comment must be the only non-whitespace on the line.
|
|
// Any comment must be the only non-whitespace on the line.
|
|
|
- if (set_indent) {
|
|
|
|
|
- emitter.EmitError<TrailingComment>(source_text.begin());
|
|
|
|
|
|
|
+ if (set_indent_) {
|
|
|
|
|
+ emitter_.EmitError<TrailingComment>(source_text.begin());
|
|
|
}
|
|
}
|
|
|
// The introducer '//' must be followed by whitespace or EOF.
|
|
// The introducer '//' must be followed by whitespace or EOF.
|
|
|
if (source_text.size() > 2 && !IsSpace(source_text[2])) {
|
|
if (source_text.size() > 2 && !IsSpace(source_text[2])) {
|
|
|
- emitter.EmitError<NoWhitespaceAfterCommentIntroducer>(
|
|
|
|
|
|
|
+ emitter_.EmitError<NoWhitespaceAfterCommentIntroducer>(
|
|
|
source_text.begin() + 2);
|
|
source_text.begin() + 2);
|
|
|
}
|
|
}
|
|
|
while (!source_text.empty() && source_text.front() != '\n') {
|
|
while (!source_text.empty() && source_text.front() != '\n') {
|
|
|
- ++current_column;
|
|
|
|
|
|
|
+ ++current_column_;
|
|
|
source_text = source_text.drop_front();
|
|
source_text = source_text.drop_front();
|
|
|
}
|
|
}
|
|
|
if (source_text.empty()) {
|
|
if (source_text.empty()) {
|
|
@@ -194,7 +180,7 @@ class TokenizedBuffer::Lexer {
|
|
|
// to avoid creating an empty line.
|
|
// to avoid creating an empty line.
|
|
|
source_text = source_text.drop_front();
|
|
source_text = source_text.drop_front();
|
|
|
if (source_text.empty()) {
|
|
if (source_text.empty()) {
|
|
|
- current_line_info->length = current_column;
|
|
|
|
|
|
|
+ current_line_info_->length = current_column_;
|
|
|
return false;
|
|
return false;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
@@ -209,7 +195,7 @@ class TokenizedBuffer::Lexer {
|
|
|
// whitespace as well as special handling to allow better tokenization
|
|
// whitespace as well as special handling to allow better tokenization
|
|
|
// of operators. This is just a stub to check that our column
|
|
// of operators. This is just a stub to check that our column
|
|
|
// management works.
|
|
// management works.
|
|
|
- ++current_column;
|
|
|
|
|
|
|
+ ++current_column_;
|
|
|
source_text = source_text.drop_front();
|
|
source_text = source_text.drop_front();
|
|
|
continue;
|
|
continue;
|
|
|
}
|
|
}
|
|
@@ -217,7 +203,7 @@ class TokenizedBuffer::Lexer {
|
|
|
|
|
|
|
|
CHECK(source_text.empty()) << "Cannot reach here w/o finishing the text!";
|
|
CHECK(source_text.empty()) << "Cannot reach here w/o finishing the text!";
|
|
|
// Update the line length as this is also the end of a line.
|
|
// Update the line length as this is also the end of a line.
|
|
|
- current_line_info->length = current_column;
|
|
|
|
|
|
|
+ current_line_info_->length = current_column_;
|
|
|
return false;
|
|
return false;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
@@ -228,43 +214,43 @@ class TokenizedBuffer::Lexer {
|
|
|
return LexResult::NoMatch();
|
|
return LexResult::NoMatch();
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- int int_column = current_column;
|
|
|
|
|
|
|
+ int int_column = current_column_;
|
|
|
int token_size = literal->Text().size();
|
|
int token_size = literal->Text().size();
|
|
|
- current_column += token_size;
|
|
|
|
|
|
|
+ current_column_ += token_size;
|
|
|
source_text = source_text.drop_front(token_size);
|
|
source_text = source_text.drop_front(token_size);
|
|
|
|
|
|
|
|
- if (!set_indent) {
|
|
|
|
|
- current_line_info->indent = int_column;
|
|
|
|
|
- set_indent = true;
|
|
|
|
|
|
|
+ if (!set_indent_) {
|
|
|
|
|
+ current_line_info_->indent = int_column;
|
|
|
|
|
+ set_indent_ = true;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
return VariantMatch(
|
|
return VariantMatch(
|
|
|
- literal->ComputeValue(emitter),
|
|
|
|
|
|
|
+ literal->ComputeValue(emitter_),
|
|
|
[&](LexedNumericLiteral::IntegerValue&& value) {
|
|
[&](LexedNumericLiteral::IntegerValue&& value) {
|
|
|
- auto token = buffer.AddToken({.kind = TokenKind::IntegerLiteral(),
|
|
|
|
|
- .token_line = current_line,
|
|
|
|
|
- .column = int_column});
|
|
|
|
|
- buffer.GetTokenInfo(token).literal_index =
|
|
|
|
|
- buffer.literal_int_storage_.size();
|
|
|
|
|
- buffer.literal_int_storage_.push_back(std::move(value.value));
|
|
|
|
|
|
|
+ auto token = buffer_.AddToken({.kind = TokenKind::IntegerLiteral(),
|
|
|
|
|
+ .token_line = current_line_,
|
|
|
|
|
+ .column = int_column});
|
|
|
|
|
+ buffer_.GetTokenInfo(token).literal_index =
|
|
|
|
|
+ buffer_.literal_int_storage_.size();
|
|
|
|
|
+ buffer_.literal_int_storage_.push_back(std::move(value.value));
|
|
|
return token;
|
|
return token;
|
|
|
},
|
|
},
|
|
|
[&](LexedNumericLiteral::RealValue&& value) {
|
|
[&](LexedNumericLiteral::RealValue&& value) {
|
|
|
- auto token = buffer.AddToken({.kind = TokenKind::RealLiteral(),
|
|
|
|
|
- .token_line = current_line,
|
|
|
|
|
- .column = int_column});
|
|
|
|
|
- buffer.GetTokenInfo(token).literal_index =
|
|
|
|
|
- buffer.literal_int_storage_.size();
|
|
|
|
|
- buffer.literal_int_storage_.push_back(std::move(value.mantissa));
|
|
|
|
|
- buffer.literal_int_storage_.push_back(std::move(value.exponent));
|
|
|
|
|
- assert(buffer.GetRealLiteral(token).IsDecimal() ==
|
|
|
|
|
|
|
+ auto token = buffer_.AddToken({.kind = TokenKind::RealLiteral(),
|
|
|
|
|
+ .token_line = current_line_,
|
|
|
|
|
+ .column = int_column});
|
|
|
|
|
+ buffer_.GetTokenInfo(token).literal_index =
|
|
|
|
|
+ buffer_.literal_int_storage_.size();
|
|
|
|
|
+ buffer_.literal_int_storage_.push_back(std::move(value.mantissa));
|
|
|
|
|
+ buffer_.literal_int_storage_.push_back(std::move(value.exponent));
|
|
|
|
|
+ assert(buffer_.GetRealLiteral(token).IsDecimal() ==
|
|
|
(value.radix == 10));
|
|
(value.radix == 10));
|
|
|
return token;
|
|
return token;
|
|
|
},
|
|
},
|
|
|
[&](LexedNumericLiteral::UnrecoverableError) {
|
|
[&](LexedNumericLiteral::UnrecoverableError) {
|
|
|
- auto token = buffer.AddToken({
|
|
|
|
|
|
|
+ auto token = buffer_.AddToken({
|
|
|
.kind = TokenKind::Error(),
|
|
.kind = TokenKind::Error(),
|
|
|
- .token_line = current_line,
|
|
|
|
|
|
|
+ .token_line = current_line_,
|
|
|
.column = int_column,
|
|
.column = int_column,
|
|
|
.error_length = token_size,
|
|
.error_length = token_size,
|
|
|
});
|
|
});
|
|
@@ -279,39 +265,39 @@ class TokenizedBuffer::Lexer {
|
|
|
return LexResult::NoMatch();
|
|
return LexResult::NoMatch();
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- Line string_line = current_line;
|
|
|
|
|
- int string_column = current_column;
|
|
|
|
|
|
|
+ Line string_line = current_line_;
|
|
|
|
|
+ int string_column = current_column_;
|
|
|
int literal_size = literal->Text().size();
|
|
int literal_size = literal->Text().size();
|
|
|
source_text = source_text.drop_front(literal_size);
|
|
source_text = source_text.drop_front(literal_size);
|
|
|
|
|
|
|
|
- if (!set_indent) {
|
|
|
|
|
- current_line_info->indent = string_column;
|
|
|
|
|
- set_indent = true;
|
|
|
|
|
|
|
+ if (!set_indent_) {
|
|
|
|
|
+ current_line_info_->indent = string_column;
|
|
|
|
|
+ set_indent_ = true;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
// Update line and column information.
|
|
// Update line and column information.
|
|
|
if (!literal->IsMultiLine()) {
|
|
if (!literal->IsMultiLine()) {
|
|
|
- current_column += literal_size;
|
|
|
|
|
|
|
+ current_column_ += literal_size;
|
|
|
} else {
|
|
} else {
|
|
|
for (char c : literal->Text()) {
|
|
for (char c : literal->Text()) {
|
|
|
if (c == '\n') {
|
|
if (c == '\n') {
|
|
|
HandleNewline();
|
|
HandleNewline();
|
|
|
// The indentation of all lines in a multi-line string literal is
|
|
// The indentation of all lines in a multi-line string literal is
|
|
|
// that of the first line.
|
|
// that of the first line.
|
|
|
- current_line_info->indent = string_column;
|
|
|
|
|
- set_indent = true;
|
|
|
|
|
|
|
+ current_line_info_->indent = string_column;
|
|
|
|
|
+ set_indent_ = true;
|
|
|
} else {
|
|
} else {
|
|
|
- ++current_column;
|
|
|
|
|
|
|
+ ++current_column_;
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- auto token = buffer.AddToken({.kind = TokenKind::StringLiteral(),
|
|
|
|
|
- .token_line = string_line,
|
|
|
|
|
- .column = string_column});
|
|
|
|
|
- buffer.GetTokenInfo(token).literal_index =
|
|
|
|
|
- buffer.literal_string_storage_.size();
|
|
|
|
|
- buffer.literal_string_storage_.push_back(literal->ComputeValue(emitter));
|
|
|
|
|
|
|
+ auto token = buffer_.AddToken({.kind = TokenKind::StringLiteral(),
|
|
|
|
|
+ .token_line = string_line,
|
|
|
|
|
+ .column = string_column});
|
|
|
|
|
+ buffer_.GetTokenInfo(token).literal_index =
|
|
|
|
|
+ buffer_.literal_string_storage_.size();
|
|
|
|
|
+ buffer_.literal_string_storage_.push_back(literal->ComputeValue(emitter_));
|
|
|
return token;
|
|
return token;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
@@ -325,22 +311,22 @@ class TokenizedBuffer::Lexer {
|
|
|
return LexResult::NoMatch();
|
|
return LexResult::NoMatch();
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- if (!set_indent) {
|
|
|
|
|
- current_line_info->indent = current_column;
|
|
|
|
|
- set_indent = true;
|
|
|
|
|
|
|
+ if (!set_indent_) {
|
|
|
|
|
+ current_line_info_->indent = current_column_;
|
|
|
|
|
+ set_indent_ = true;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
CloseInvalidOpenGroups(kind);
|
|
CloseInvalidOpenGroups(kind);
|
|
|
|
|
|
|
|
const char* location = source_text.begin();
|
|
const char* location = source_text.begin();
|
|
|
- Token token = buffer.AddToken(
|
|
|
|
|
- {.kind = kind, .token_line = current_line, .column = current_column});
|
|
|
|
|
- current_column += kind.GetFixedSpelling().size();
|
|
|
|
|
|
|
+ Token token = buffer_.AddToken(
|
|
|
|
|
+ {.kind = kind, .token_line = current_line_, .column = current_column_});
|
|
|
|
|
+ current_column_ += kind.GetFixedSpelling().size();
|
|
|
source_text = source_text.drop_front(kind.GetFixedSpelling().size());
|
|
source_text = source_text.drop_front(kind.GetFixedSpelling().size());
|
|
|
|
|
|
|
|
// Opening symbols just need to be pushed onto our queue of opening groups.
|
|
// Opening symbols just need to be pushed onto our queue of opening groups.
|
|
|
if (kind.IsOpeningSymbol()) {
|
|
if (kind.IsOpeningSymbol()) {
|
|
|
- open_groups.push_back(token);
|
|
|
|
|
|
|
+ open_groups_.push_back(token);
|
|
|
return token;
|
|
return token;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
@@ -349,22 +335,22 @@ class TokenizedBuffer::Lexer {
|
|
|
return token;
|
|
return token;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- TokenInfo& closing_token_info = buffer.GetTokenInfo(token);
|
|
|
|
|
|
|
+ TokenInfo& closing_token_info = buffer_.GetTokenInfo(token);
|
|
|
|
|
|
|
|
// Check that there is a matching opening symbol before we consume this as
|
|
// Check that there is a matching opening symbol before we consume this as
|
|
|
// a closing symbol.
|
|
// a closing symbol.
|
|
|
- if (open_groups.empty()) {
|
|
|
|
|
|
|
+ if (open_groups_.empty()) {
|
|
|
closing_token_info.kind = TokenKind::Error();
|
|
closing_token_info.kind = TokenKind::Error();
|
|
|
closing_token_info.error_length = kind.GetFixedSpelling().size();
|
|
closing_token_info.error_length = kind.GetFixedSpelling().size();
|
|
|
|
|
|
|
|
- emitter.EmitError<UnmatchedClosing>(location);
|
|
|
|
|
|
|
+ emitter_.EmitError<UnmatchedClosing>(location);
|
|
|
// Note that this still returns true as we do consume a symbol.
|
|
// Note that this still returns true as we do consume a symbol.
|
|
|
return token;
|
|
return token;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
// Finally can handle a normal closing symbol.
|
|
// Finally can handle a normal closing symbol.
|
|
|
- Token opening_token = open_groups.pop_back_val();
|
|
|
|
|
- TokenInfo& opening_token_info = buffer.GetTokenInfo(opening_token);
|
|
|
|
|
|
|
+ Token opening_token = open_groups_.pop_back_val();
|
|
|
|
|
+ TokenInfo& opening_token_info = buffer_.GetTokenInfo(opening_token);
|
|
|
opening_token_info.closing_token = token;
|
|
opening_token_info.closing_token = token;
|
|
|
closing_token_info.opening_token = opening_token;
|
|
closing_token_info.opening_token = opening_token;
|
|
|
return token;
|
|
return token;
|
|
@@ -404,11 +390,11 @@ class TokenizedBuffer::Lexer {
|
|
|
return LexResult::NoMatch();
|
|
return LexResult::NoMatch();
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- auto token = buffer.AddToken(
|
|
|
|
|
- {.kind = *kind, .token_line = current_line, .column = column});
|
|
|
|
|
- buffer.GetTokenInfo(token).literal_index =
|
|
|
|
|
- buffer.literal_int_storage_.size();
|
|
|
|
|
- buffer.literal_int_storage_.push_back(std::move(suffix_value));
|
|
|
|
|
|
|
+ auto token = buffer_.AddToken(
|
|
|
|
|
+ {.kind = *kind, .token_line = current_line_, .column = column});
|
|
|
|
|
+ buffer_.GetTokenInfo(token).literal_index =
|
|
|
|
|
+ buffer_.literal_int_storage_.size();
|
|
|
|
|
+ buffer_.literal_int_storage_.push_back(std::move(suffix_value));
|
|
|
return token;
|
|
return token;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
@@ -419,39 +405,39 @@ class TokenizedBuffer::Lexer {
|
|
|
return;
|
|
return;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- while (!open_groups.empty()) {
|
|
|
|
|
- Token opening_token = open_groups.back();
|
|
|
|
|
- TokenKind opening_kind = buffer.GetTokenInfo(opening_token).kind;
|
|
|
|
|
|
|
+ while (!open_groups_.empty()) {
|
|
|
|
|
+ Token opening_token = open_groups_.back();
|
|
|
|
|
+ TokenKind opening_kind = buffer_.GetTokenInfo(opening_token).kind;
|
|
|
if (kind == opening_kind.GetClosingSymbol()) {
|
|
if (kind == opening_kind.GetClosingSymbol()) {
|
|
|
return;
|
|
return;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- open_groups.pop_back();
|
|
|
|
|
- token_emitter.EmitError<MismatchedClosing>(opening_token);
|
|
|
|
|
|
|
+ open_groups_.pop_back();
|
|
|
|
|
+ token_emitter_.EmitError<MismatchedClosing>(opening_token);
|
|
|
|
|
|
|
|
- CHECK(!buffer.Tokens().empty()) << "Must have a prior opening token!";
|
|
|
|
|
- Token prev_token = buffer.Tokens().end()[-1];
|
|
|
|
|
|
|
+ CHECK(!buffer_.Tokens().empty()) << "Must have a prior opening token!";
|
|
|
|
|
+ Token prev_token = buffer_.Tokens().end()[-1];
|
|
|
|
|
|
|
|
// TODO: do a smarter backwards scan for where to put the closing
|
|
// TODO: do a smarter backwards scan for where to put the closing
|
|
|
// token.
|
|
// token.
|
|
|
- Token closing_token = buffer.AddToken(
|
|
|
|
|
|
|
+ Token closing_token = buffer_.AddToken(
|
|
|
{.kind = opening_kind.GetClosingSymbol(),
|
|
{.kind = opening_kind.GetClosingSymbol(),
|
|
|
- .has_trailing_space = buffer.HasTrailingWhitespace(prev_token),
|
|
|
|
|
|
|
+ .has_trailing_space = buffer_.HasTrailingWhitespace(prev_token),
|
|
|
.is_recovery = true,
|
|
.is_recovery = true,
|
|
|
- .token_line = current_line,
|
|
|
|
|
- .column = current_column});
|
|
|
|
|
- TokenInfo& opening_token_info = buffer.GetTokenInfo(opening_token);
|
|
|
|
|
- TokenInfo& closing_token_info = buffer.GetTokenInfo(closing_token);
|
|
|
|
|
|
|
+ .token_line = current_line_,
|
|
|
|
|
+ .column = current_column_});
|
|
|
|
|
+ TokenInfo& opening_token_info = buffer_.GetTokenInfo(opening_token);
|
|
|
|
|
+ TokenInfo& closing_token_info = buffer_.GetTokenInfo(closing_token);
|
|
|
opening_token_info.closing_token = closing_token;
|
|
opening_token_info.closing_token = closing_token;
|
|
|
closing_token_info.opening_token = opening_token;
|
|
closing_token_info.opening_token = opening_token;
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
auto GetOrCreateIdentifier(llvm::StringRef text) -> Identifier {
|
|
auto GetOrCreateIdentifier(llvm::StringRef text) -> Identifier {
|
|
|
- auto insert_result = buffer.identifier_map_.insert(
|
|
|
|
|
- {text, Identifier(buffer.identifier_infos_.size())});
|
|
|
|
|
|
|
+ auto insert_result = buffer_.identifier_map_.insert(
|
|
|
|
|
+ {text, Identifier(buffer_.identifier_infos_.size())});
|
|
|
if (insert_result.second) {
|
|
if (insert_result.second) {
|
|
|
- buffer.identifier_infos_.push_back({text});
|
|
|
|
|
|
|
+ buffer_.identifier_infos_.push_back({text});
|
|
|
}
|
|
}
|
|
|
return insert_result.first->second;
|
|
return insert_result.first->second;
|
|
|
}
|
|
}
|
|
@@ -461,17 +447,17 @@ class TokenizedBuffer::Lexer {
|
|
|
return LexResult::NoMatch();
|
|
return LexResult::NoMatch();
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- if (!set_indent) {
|
|
|
|
|
- current_line_info->indent = current_column;
|
|
|
|
|
- set_indent = true;
|
|
|
|
|
|
|
+ if (!set_indent_) {
|
|
|
|
|
+ current_line_info_->indent = current_column_;
|
|
|
|
|
+ set_indent_ = true;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
// Take the valid characters off the front of the source buffer.
|
|
// Take the valid characters off the front of the source buffer.
|
|
|
llvm::StringRef identifier_text =
|
|
llvm::StringRef identifier_text =
|
|
|
source_text.take_while([](char c) { return IsAlnum(c) || c == '_'; });
|
|
source_text.take_while([](char c) { return IsAlnum(c) || c == '_'; });
|
|
|
CHECK(!identifier_text.empty()) << "Must have at least one character!";
|
|
CHECK(!identifier_text.empty()) << "Must have at least one character!";
|
|
|
- int identifier_column = current_column;
|
|
|
|
|
- current_column += identifier_text.size();
|
|
|
|
|
|
|
+ int identifier_column = current_column_;
|
|
|
|
|
+ current_column_ += identifier_text.size();
|
|
|
source_text = source_text.drop_front(identifier_text.size());
|
|
source_text = source_text.drop_front(identifier_text.size());
|
|
|
|
|
|
|
|
// Check if the text is a type literal, and if so form such a literal.
|
|
// Check if the text is a type literal, and if so form such a literal.
|
|
@@ -486,16 +472,16 @@ class TokenizedBuffer::Lexer {
|
|
|
#include "toolchain/lexer/token_registry.def"
|
|
#include "toolchain/lexer/token_registry.def"
|
|
|
.Default(TokenKind::Error());
|
|
.Default(TokenKind::Error());
|
|
|
if (kind != TokenKind::Error()) {
|
|
if (kind != TokenKind::Error()) {
|
|
|
- return buffer.AddToken({.kind = kind,
|
|
|
|
|
- .token_line = current_line,
|
|
|
|
|
- .column = identifier_column});
|
|
|
|
|
|
|
+ return buffer_.AddToken({.kind = kind,
|
|
|
|
|
+ .token_line = current_line_,
|
|
|
|
|
+ .column = identifier_column});
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
// Otherwise we have a generic identifier.
|
|
// Otherwise we have a generic identifier.
|
|
|
- return buffer.AddToken({.kind = TokenKind::Identifier(),
|
|
|
|
|
- .token_line = current_line,
|
|
|
|
|
- .column = identifier_column,
|
|
|
|
|
- .id = GetOrCreateIdentifier(identifier_text)});
|
|
|
|
|
|
|
+ return buffer_.AddToken({.kind = TokenKind::Identifier(),
|
|
|
|
|
+ .token_line = current_line_,
|
|
|
|
|
+ .column = identifier_column,
|
|
|
|
|
+ .id = GetOrCreateIdentifier(identifier_text)});
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
auto LexError(llvm::StringRef& source_text) -> LexResult {
|
|
auto LexError(llvm::StringRef& source_text) -> LexResult {
|
|
@@ -522,23 +508,40 @@ class TokenizedBuffer::Lexer {
|
|
|
|
|
|
|
|
// Longer errors get to be two tokens.
|
|
// Longer errors get to be two tokens.
|
|
|
error_text = error_text.substr(0, std::numeric_limits<int32_t>::max());
|
|
error_text = error_text.substr(0, std::numeric_limits<int32_t>::max());
|
|
|
- auto token = buffer.AddToken(
|
|
|
|
|
|
|
+ auto token = buffer_.AddToken(
|
|
|
{.kind = TokenKind::Error(),
|
|
{.kind = TokenKind::Error(),
|
|
|
- .token_line = current_line,
|
|
|
|
|
- .column = current_column,
|
|
|
|
|
|
|
+ .token_line = current_line_,
|
|
|
|
|
+ .column = current_column_,
|
|
|
.error_length = static_cast<int32_t>(error_text.size())});
|
|
.error_length = static_cast<int32_t>(error_text.size())});
|
|
|
- emitter.EmitError<UnrecognizedCharacters>(error_text.begin());
|
|
|
|
|
|
|
+ emitter_.EmitError<UnrecognizedCharacters>(error_text.begin());
|
|
|
|
|
|
|
|
- current_column += error_text.size();
|
|
|
|
|
|
|
+ current_column_ += error_text.size();
|
|
|
source_text = source_text.drop_front(error_text.size());
|
|
source_text = source_text.drop_front(error_text.size());
|
|
|
return token;
|
|
return token;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
auto AddEndOfFileToken() -> void {
|
|
auto AddEndOfFileToken() -> void {
|
|
|
- buffer.AddToken({.kind = TokenKind::EndOfFile(),
|
|
|
|
|
- .token_line = current_line,
|
|
|
|
|
- .column = current_column});
|
|
|
|
|
|
|
+ buffer_.AddToken({.kind = TokenKind::EndOfFile(),
|
|
|
|
|
+ .token_line = current_line_,
|
|
|
|
|
+ .column = current_column_});
|
|
|
}
|
|
}
|
|
|
|
|
+
|
|
|
|
|
+ private:
|
|
|
|
|
+ TokenizedBuffer& buffer_;
|
|
|
|
|
+
|
|
|
|
|
+ SourceBufferLocationTranslator translator_;
|
|
|
|
|
+ LexerDiagnosticEmitter emitter_;
|
|
|
|
|
+
|
|
|
|
|
+ TokenLocationTranslator token_translator_;
|
|
|
|
|
+ TokenDiagnosticEmitter token_emitter_;
|
|
|
|
|
+
|
|
|
|
|
+ Line current_line_;
|
|
|
|
|
+ LineInfo* current_line_info_;
|
|
|
|
|
+
|
|
|
|
|
+ int current_column_ = 0;
|
|
|
|
|
+ bool set_indent_ = false;
|
|
|
|
|
+
|
|
|
|
|
+ llvm::SmallVector<Token, 8> open_groups_;
|
|
|
};
|
|
};
|
|
|
|
|
|
|
|
auto TokenizedBuffer::Lex(SourceBuffer& source, DiagnosticConsumer& consumer)
|
|
auto TokenizedBuffer::Lex(SourceBuffer& source, DiagnosticConsumer& consumer)
|