parser_context.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "toolchain/parser/parser_context.h"
  5. #include <cstdlib>
  6. #include <memory>
  7. #include <optional>
  8. #include "common/check.h"
  9. #include "toolchain/lexer/token_kind.h"
  10. #include "toolchain/lexer/tokenized_buffer.h"
  11. #include "toolchain/parser/parse_node_kind.h"
  12. #include "toolchain/parser/parse_tree.h"
  13. namespace Carbon {
  14. // A relative location for characters in errors.
  15. enum class RelativeLocation : int8_t {
  16. Around,
  17. After,
  18. Before,
  19. };
  20. // Adapts RelativeLocation for use with formatv.
  21. static auto operator<<(llvm::raw_ostream& out, RelativeLocation loc)
  22. -> llvm::raw_ostream& {
  23. switch (loc) {
  24. case RelativeLocation::Around:
  25. out << "around";
  26. break;
  27. case RelativeLocation::After:
  28. out << "after";
  29. break;
  30. case RelativeLocation::Before:
  31. out << "before";
  32. break;
  33. }
  34. return out;
  35. }
  36. ParserContext::ParserContext(ParseTree& tree, TokenizedBuffer& tokens,
  37. TokenDiagnosticEmitter& emitter,
  38. llvm::raw_ostream* vlog_stream)
  39. : tree_(&tree),
  40. tokens_(&tokens),
  41. emitter_(&emitter),
  42. vlog_stream_(vlog_stream),
  43. position_(tokens_->tokens().begin()),
  44. end_(tokens_->tokens().end()) {
  45. CARBON_CHECK(position_ != end_) << "Empty TokenizedBuffer";
  46. --end_;
  47. CARBON_CHECK(tokens_->GetKind(*end_) == TokenKind::EndOfFile)
  48. << "TokenizedBuffer should end with EndOfFile, ended with "
  49. << tokens_->GetKind(*end_);
  50. }
  51. auto ParserContext::AddLeafNode(ParseNodeKind kind,
  52. TokenizedBuffer::Token token, bool has_error)
  53. -> void {
  54. tree_->node_impls_.push_back(
  55. ParseTree::NodeImpl(kind, has_error, token, /*subtree_size=*/1));
  56. if (has_error) {
  57. tree_->has_errors_ = true;
  58. }
  59. }
  60. auto ParserContext::AddNode(ParseNodeKind kind, TokenizedBuffer::Token token,
  61. int subtree_start, bool has_error) -> void {
  62. int subtree_size = tree_->size() - subtree_start + 1;
  63. tree_->node_impls_.push_back(
  64. ParseTree::NodeImpl(kind, has_error, token, subtree_size));
  65. if (has_error) {
  66. tree_->has_errors_ = true;
  67. }
  68. }
  69. auto ParserContext::ConsumeAndAddOpenParen(TokenizedBuffer::Token default_token,
  70. ParseNodeKind start_kind) -> void {
  71. if (auto open_paren = ConsumeIf(TokenKind::OpenParen)) {
  72. AddLeafNode(start_kind, *open_paren, /*has_error=*/false);
  73. } else {
  74. CARBON_DIAGNOSTIC(ExpectedParenAfter, Error, "Expected `(` after `{0}`.",
  75. TokenKind);
  76. emitter_->Emit(*position_, ExpectedParenAfter,
  77. tokens().GetKind(default_token));
  78. AddLeafNode(start_kind, default_token, /*has_error=*/true);
  79. }
  80. }
  81. auto ParserContext::ConsumeAndAddCloseParen(StateStackEntry state,
  82. ParseNodeKind close_kind) -> void {
  83. // state.token should point at the introducer, with the paren one after the
  84. // introducer.
  85. auto expected_paren = *(TokenizedBuffer::TokenIterator(state.token) + 1);
  86. if (tokens().GetKind(expected_paren) != TokenKind::OpenParen) {
  87. AddNode(close_kind, state.token, state.subtree_start, /*has_error=*/true);
  88. } else if (auto close_token = ConsumeIf(TokenKind::CloseParen)) {
  89. AddNode(close_kind, *close_token, state.subtree_start, state.has_error);
  90. } else {
  91. // TODO: Include the location of the matching open_paren in the diagnostic.
  92. CARBON_DIAGNOSTIC(ExpectedCloseParen, Error,
  93. "Unexpected tokens before `)`.");
  94. emitter_->Emit(*position_, ExpectedCloseParen);
  95. SkipTo(tokens().GetMatchedClosingToken(expected_paren));
  96. AddNode(close_kind, Consume(), state.subtree_start, /*has_error=*/true);
  97. }
  98. }
  99. auto ParserContext::ConsumeAndAddLeafNodeIf(TokenKind token_kind,
  100. ParseNodeKind node_kind) -> bool {
  101. auto token = ConsumeIf(token_kind);
  102. if (!token) {
  103. return false;
  104. }
  105. AddLeafNode(node_kind, *token);
  106. return true;
  107. }
  108. auto ParserContext::ConsumeChecked(TokenKind kind) -> TokenizedBuffer::Token {
  109. CARBON_CHECK(PositionIs(kind))
  110. << "Required " << kind << ", found " << PositionKind();
  111. return Consume();
  112. }
  113. auto ParserContext::ConsumeIf(TokenKind kind)
  114. -> std::optional<TokenizedBuffer::Token> {
  115. if (!PositionIs(kind)) {
  116. return std::nullopt;
  117. }
  118. return Consume();
  119. }
  120. auto ParserContext::ConsumeIfPatternKeyword(TokenKind keyword_token,
  121. ParserState keyword_state,
  122. int subtree_start) -> void {
  123. if (auto token = ConsumeIf(keyword_token)) {
  124. PushState(ParserContext::StateStackEntry(
  125. keyword_state, PrecedenceGroup::ForTopLevelExpression(),
  126. PrecedenceGroup::ForTopLevelExpression(), *token, subtree_start));
  127. }
  128. }
  129. auto ParserContext::FindNextOf(std::initializer_list<TokenKind> desired_kinds)
  130. -> std::optional<TokenizedBuffer::Token> {
  131. auto new_position = position_;
  132. while (true) {
  133. TokenizedBuffer::Token token = *new_position;
  134. TokenKind kind = tokens().GetKind(token);
  135. if (kind.IsOneOf(desired_kinds)) {
  136. return token;
  137. }
  138. // Step to the next token at the current bracketing level.
  139. if (kind.is_closing_symbol() || kind == TokenKind::EndOfFile) {
  140. // There are no more tokens at this level.
  141. return std::nullopt;
  142. } else if (kind.is_opening_symbol()) {
  143. new_position = TokenizedBuffer::TokenIterator(
  144. tokens().GetMatchedClosingToken(token));
  145. // Advance past the closing token.
  146. ++new_position;
  147. } else {
  148. ++new_position;
  149. }
  150. }
  151. }
  152. auto ParserContext::SkipMatchingGroup() -> bool {
  153. if (!PositionKind().is_opening_symbol()) {
  154. return false;
  155. }
  156. SkipTo(tokens().GetMatchedClosingToken(*position_));
  157. ++position_;
  158. return true;
  159. }
  160. auto ParserContext::SkipPastLikelyEnd(TokenizedBuffer::Token skip_root)
  161. -> std::optional<TokenizedBuffer::Token> {
  162. if (position_ == end_) {
  163. return std::nullopt;
  164. }
  165. TokenizedBuffer::Line root_line = tokens().GetLine(skip_root);
  166. int root_line_indent = tokens().GetIndentColumnNumber(root_line);
  167. // We will keep scanning through tokens on the same line as the root or
  168. // lines with greater indentation than root's line.
  169. auto is_same_line_or_indent_greater_than_root =
  170. [&](TokenizedBuffer::Token t) {
  171. TokenizedBuffer::Line l = tokens().GetLine(t);
  172. if (l == root_line) {
  173. return true;
  174. }
  175. return tokens().GetIndentColumnNumber(l) > root_line_indent;
  176. };
  177. do {
  178. if (PositionIs(TokenKind::CloseCurlyBrace)) {
  179. // Immediately bail out if we hit an unmatched close curly, this will
  180. // pop us up a level of the syntax grouping.
  181. return std::nullopt;
  182. }
  183. // We assume that a semicolon is always intended to be the end of the
  184. // current construct.
  185. if (auto semi = ConsumeIf(TokenKind::Semi)) {
  186. return semi;
  187. }
  188. // Skip over any matching group of tokens().
  189. if (SkipMatchingGroup()) {
  190. continue;
  191. }
  192. // Otherwise just step forward one token.
  193. ++position_;
  194. } while (position_ != end_ &&
  195. is_same_line_or_indent_greater_than_root(*position_));
  196. return std::nullopt;
  197. }
  198. auto ParserContext::SkipTo(TokenizedBuffer::Token t) -> void {
  199. CARBON_CHECK(t >= *position_) << "Tried to skip backwards from " << position_
  200. << " to " << TokenizedBuffer::TokenIterator(t);
  201. position_ = TokenizedBuffer::TokenIterator(t);
  202. CARBON_CHECK(position_ != end_) << "Skipped past EOF.";
  203. }
  204. // Determines whether the given token is considered to be the start of an
  205. // operand according to the rules for infix operator parsing.
  206. static auto IsAssumedStartOfOperand(TokenKind kind) -> bool {
  207. return kind.IsOneOf({TokenKind::OpenParen, TokenKind::Identifier,
  208. TokenKind::IntegerLiteral, TokenKind::RealLiteral,
  209. TokenKind::StringLiteral});
  210. }
  211. // Determines whether the given token is considered to be the end of an
  212. // operand according to the rules for infix operator parsing.
  213. static auto IsAssumedEndOfOperand(TokenKind kind) -> bool {
  214. return kind.IsOneOf({TokenKind::CloseParen, TokenKind::CloseCurlyBrace,
  215. TokenKind::CloseSquareBracket, TokenKind::Identifier,
  216. TokenKind::IntegerLiteral, TokenKind::RealLiteral,
  217. TokenKind::StringLiteral});
  218. }
  219. // Determines whether the given token could possibly be the start of an
  220. // operand. This is conservatively correct, and will never incorrectly return
  221. // `false`, but can incorrectly return `true`.
  222. static auto IsPossibleStartOfOperand(TokenKind kind) -> bool {
  223. return !kind.IsOneOf({TokenKind::CloseParen, TokenKind::CloseCurlyBrace,
  224. TokenKind::CloseSquareBracket, TokenKind::Comma,
  225. TokenKind::Semi, TokenKind::Colon});
  226. }
  227. auto ParserContext::IsLexicallyValidInfixOperator() -> bool {
  228. CARBON_CHECK(position_ != end_) << "Expected an operator token.";
  229. bool leading_space = tokens().HasLeadingWhitespace(*position_);
  230. bool trailing_space = tokens().HasTrailingWhitespace(*position_);
  231. // If there's whitespace on both sides, it's an infix operator.
  232. if (leading_space && trailing_space) {
  233. return true;
  234. }
  235. // If there's whitespace on exactly one side, it's not an infix operator.
  236. if (leading_space || trailing_space) {
  237. return false;
  238. }
  239. // Otherwise, for an infix operator, the preceding token must be any close
  240. // bracket, identifier, or literal and the next token must be an open paren,
  241. // identifier, or literal.
  242. if (position_ == tokens().tokens().begin() ||
  243. !IsAssumedEndOfOperand(tokens().GetKind(*(position_ - 1))) ||
  244. !IsAssumedStartOfOperand(tokens().GetKind(*(position_ + 1)))) {
  245. return false;
  246. }
  247. return true;
  248. }
  249. auto ParserContext::IsTrailingOperatorInfix() -> bool {
  250. if (position_ == end_) {
  251. return false;
  252. }
  253. // An operator that follows the infix operator rules is parsed as
  254. // infix, unless the next token means that it can't possibly be.
  255. if (IsLexicallyValidInfixOperator() &&
  256. IsPossibleStartOfOperand(tokens().GetKind(*(position_ + 1)))) {
  257. return true;
  258. }
  259. // A trailing operator with leading whitespace that's not valid as infix is
  260. // not valid at all. If the next token looks like the start of an operand,
  261. // then parse as infix, otherwise as postfix. Either way we'll produce a
  262. // diagnostic later on.
  263. if (tokens().HasLeadingWhitespace(*position_) &&
  264. IsAssumedStartOfOperand(tokens().GetKind(*(position_ + 1)))) {
  265. return true;
  266. }
  267. return false;
  268. }
  269. auto ParserContext::DiagnoseOperatorFixity(OperatorFixity fixity) -> void {
  270. if (!PositionKind().is_symbol()) {
  271. // Whitespace-based fixity rules only apply to symbolic operators.
  272. return;
  273. }
  274. if (fixity == OperatorFixity::Infix) {
  275. // Infix operators must satisfy the infix operator rules.
  276. if (!IsLexicallyValidInfixOperator()) {
  277. CARBON_DIAGNOSTIC(BinaryOperatorRequiresWhitespace, Error,
  278. "Whitespace missing {0} binary operator.",
  279. RelativeLocation);
  280. emitter_->Emit(*position_, BinaryOperatorRequiresWhitespace,
  281. tokens().HasLeadingWhitespace(*position_)
  282. ? RelativeLocation::After
  283. : (tokens().HasTrailingWhitespace(*position_)
  284. ? RelativeLocation::Before
  285. : RelativeLocation::Around));
  286. }
  287. } else {
  288. bool prefix = fixity == OperatorFixity::Prefix;
  289. // Whitespace is not permitted between a symbolic pre/postfix operator and
  290. // its operand.
  291. if ((prefix ? tokens().HasTrailingWhitespace(*position_)
  292. : tokens().HasLeadingWhitespace(*position_))) {
  293. CARBON_DIAGNOSTIC(UnaryOperatorHasWhitespace, Error,
  294. "Whitespace is not allowed {0} this unary operator.",
  295. RelativeLocation);
  296. emitter_->Emit(
  297. *position_, UnaryOperatorHasWhitespace,
  298. prefix ? RelativeLocation::After : RelativeLocation::Before);
  299. } else if (IsLexicallyValidInfixOperator()) {
  300. // Pre/postfix operators must not satisfy the infix operator rules.
  301. CARBON_DIAGNOSTIC(UnaryOperatorRequiresWhitespace, Error,
  302. "Whitespace is required {0} this unary operator.",
  303. RelativeLocation);
  304. emitter_->Emit(
  305. *position_, UnaryOperatorRequiresWhitespace,
  306. prefix ? RelativeLocation::Before : RelativeLocation::After);
  307. }
  308. }
  309. }
  310. auto ParserContext::ConsumeListToken(ParseNodeKind comma_kind,
  311. TokenKind close_kind,
  312. bool already_has_error) -> ListTokenKind {
  313. if (!PositionIs(TokenKind::Comma) && !PositionIs(close_kind)) {
  314. // Don't error a second time on the same element.
  315. if (!already_has_error) {
  316. CARBON_DIAGNOSTIC(UnexpectedTokenAfterListElement, Error,
  317. "Expected `,` or `{0}`.", TokenKind);
  318. emitter_->Emit(*position_, UnexpectedTokenAfterListElement, close_kind);
  319. ReturnErrorOnState();
  320. }
  321. // Recover from the invalid token.
  322. auto end_of_element = FindNextOf({TokenKind::Comma, close_kind});
  323. // The lexer guarantees that parentheses are balanced.
  324. CARBON_CHECK(end_of_element)
  325. << "missing matching `" << close_kind.opening_symbol() << "` for `"
  326. << close_kind << "`";
  327. SkipTo(*end_of_element);
  328. }
  329. if (PositionIs(close_kind)) {
  330. return ListTokenKind::Close;
  331. } else {
  332. AddLeafNode(comma_kind, Consume());
  333. return PositionIs(close_kind) ? ListTokenKind::CommaClose
  334. : ListTokenKind::Comma;
  335. }
  336. }
  337. auto ParserContext::GetDeclarationContext() -> DeclarationContext {
  338. // i == 0 is the file-level DeclarationScopeLoop. Additionally, i == 1 can be
  339. // skipped because it will never be a DeclarationScopeLoop.
  340. for (int i = state_stack_.size() - 1; i > 1; --i) {
  341. // The declaration context is always the state _above_ a
  342. // DeclarationScopeLoop.
  343. if (state_stack_[i].state == ParserState::DeclarationScopeLoop) {
  344. switch (state_stack_[i - 1].state) {
  345. case ParserState::TypeDefinitionFinishAsClass:
  346. return DeclarationContext::Class;
  347. case ParserState::TypeDefinitionFinishAsInterface:
  348. return DeclarationContext::Interface;
  349. case ParserState::TypeDefinitionFinishAsNamedConstraint:
  350. return DeclarationContext::NamedConstraint;
  351. default:
  352. llvm_unreachable("Missing handling for a declaration scope");
  353. }
  354. }
  355. }
  356. CARBON_CHECK(!state_stack_.empty() &&
  357. state_stack_[0].state == ParserState::DeclarationScopeLoop);
  358. return DeclarationContext::File;
  359. }
  360. auto ParserContext::RecoverFromDeclarationError(StateStackEntry state,
  361. ParseNodeKind parse_node_kind,
  362. bool skip_past_likely_end)
  363. -> void {
  364. auto token = state.token;
  365. if (skip_past_likely_end) {
  366. if (auto semi = SkipPastLikelyEnd(token)) {
  367. token = *semi;
  368. }
  369. }
  370. AddNode(parse_node_kind, token, state.subtree_start,
  371. /*has_error=*/true);
  372. }
  373. auto ParserContext::EmitExpectedDeclarationSemiOrDefinition(
  374. TokenKind expected_kind) -> void {
  375. CARBON_DIAGNOSTIC(ExpectedDeclarationSemiOrDefinition, Error,
  376. "`{0}` should either end with a `;` for a declaration or "
  377. "have a `{{ ... }` block for a definition.",
  378. TokenKind);
  379. emitter().Emit(*position(), ExpectedDeclarationSemiOrDefinition,
  380. expected_kind);
  381. }
  382. } // namespace Carbon