context.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "toolchain/parse/context.h"
  5. #include <optional>
  6. #include "common/check.h"
  7. #include "common/ostream.h"
  8. #include "llvm/ADT/STLExtras.h"
  9. #include "toolchain/lex/token_kind.h"
  10. #include "toolchain/lex/tokenized_buffer.h"
  11. #include "toolchain/parse/node_kind.h"
  12. #include "toolchain/parse/tree.h"
  13. namespace Carbon::Parse {
  14. // A relative location for characters in errors.
  15. enum class RelativeLocation : int8_t {
  16. Around,
  17. After,
  18. Before,
  19. };
  20. // Adapts RelativeLocation for use with formatv.
  21. static auto operator<<(llvm::raw_ostream& out, RelativeLocation loc)
  22. -> llvm::raw_ostream& {
  23. switch (loc) {
  24. case RelativeLocation::Around:
  25. out << "around";
  26. break;
  27. case RelativeLocation::After:
  28. out << "after";
  29. break;
  30. case RelativeLocation::Before:
  31. out << "before";
  32. break;
  33. }
  34. return out;
  35. }
  36. Context::Context(Tree& tree, Lex::TokenizedBuffer& tokens,
  37. Lex::TokenDiagnosticEmitter& emitter,
  38. llvm::raw_ostream* vlog_stream)
  39. : tree_(&tree),
  40. tokens_(&tokens),
  41. emitter_(&emitter),
  42. vlog_stream_(vlog_stream),
  43. position_(tokens_->tokens().begin()),
  44. end_(tokens_->tokens().end()) {
  45. CARBON_CHECK(position_ != end_) << "Empty TokenizedBuffer";
  46. --end_;
  47. CARBON_CHECK(tokens_->GetKind(*end_) == Lex::TokenKind::EndOfFile)
  48. << "TokenizedBuffer should end with EndOfFile, ended with "
  49. << tokens_->GetKind(*end_);
  50. }
  51. auto Context::AddLeafNode(NodeKind kind, Lex::Token token, bool has_error)
  52. -> void {
  53. CheckNodeMatchesLexerToken(kind, tokens_->GetKind(token), has_error);
  54. tree_->node_impls_.push_back(
  55. Tree::NodeImpl(kind, has_error, token, /*subtree_size=*/1));
  56. if (has_error) {
  57. tree_->has_errors_ = true;
  58. }
  59. }
  60. auto Context::AddNode(NodeKind kind, Lex::Token token, int subtree_start,
  61. bool has_error) -> void {
  62. CheckNodeMatchesLexerToken(kind, tokens_->GetKind(token), has_error);
  63. int subtree_size = tree_->size() - subtree_start + 1;
  64. tree_->node_impls_.push_back(
  65. Tree::NodeImpl(kind, has_error, token, subtree_size));
  66. if (has_error) {
  67. tree_->has_errors_ = true;
  68. }
  69. }
  70. auto Context::ConsumeAndAddOpenParen(Lex::Token default_token,
  71. NodeKind start_kind)
  72. -> std::optional<Lex::Token> {
  73. if (auto open_paren = ConsumeIf(Lex::TokenKind::OpenParen)) {
  74. AddLeafNode(start_kind, *open_paren, /*has_error=*/false);
  75. return open_paren;
  76. } else {
  77. CARBON_DIAGNOSTIC(ExpectedParenAfter, Error, "Expected `(` after `{0}`.",
  78. Lex::TokenKind);
  79. emitter_->Emit(*position_, ExpectedParenAfter,
  80. tokens().GetKind(default_token));
  81. AddLeafNode(start_kind, default_token, /*has_error=*/true);
  82. return std::nullopt;
  83. }
  84. }
  85. auto Context::ConsumeAndAddCloseSymbol(Lex::Token expected_open,
  86. StateStackEntry state,
  87. NodeKind close_kind) -> void {
  88. Lex::TokenKind open_token_kind = tokens().GetKind(expected_open);
  89. if (!open_token_kind.is_opening_symbol()) {
  90. AddNode(close_kind, state.token, state.subtree_start, /*has_error=*/true);
  91. } else if (auto close_token = ConsumeIf(open_token_kind.closing_symbol())) {
  92. AddNode(close_kind, *close_token, state.subtree_start, state.has_error);
  93. } else {
  94. // TODO: Include the location of the matching opening delimiter in the
  95. // diagnostic.
  96. CARBON_DIAGNOSTIC(ExpectedCloseSymbol, Error,
  97. "Unexpected tokens before `{0}`.", llvm::StringRef);
  98. emitter_->Emit(*position_, ExpectedCloseSymbol,
  99. open_token_kind.closing_symbol().fixed_spelling());
  100. SkipTo(tokens().GetMatchedClosingToken(expected_open));
  101. AddNode(close_kind, Consume(), state.subtree_start, /*has_error=*/true);
  102. }
  103. }
  104. auto Context::ConsumeAndAddLeafNodeIf(Lex::TokenKind token_kind,
  105. NodeKind node_kind) -> bool {
  106. auto token = ConsumeIf(token_kind);
  107. if (!token) {
  108. return false;
  109. }
  110. AddLeafNode(node_kind, *token);
  111. return true;
  112. }
  113. auto Context::ConsumeChecked(Lex::TokenKind kind) -> Lex::Token {
  114. CARBON_CHECK(PositionIs(kind))
  115. << "Required " << kind << ", found " << PositionKind();
  116. return Consume();
  117. }
  118. auto Context::ConsumeIf(Lex::TokenKind kind) -> std::optional<Lex::Token> {
  119. if (!PositionIs(kind)) {
  120. return std::nullopt;
  121. }
  122. return Consume();
  123. }
  124. auto Context::ConsumeIfPatternKeyword(Lex::TokenKind keyword_token,
  125. State keyword_state, int subtree_start)
  126. -> void {
  127. if (auto token = ConsumeIf(keyword_token)) {
  128. PushState(Context::StateStackEntry(
  129. keyword_state, PrecedenceGroup::ForTopLevelExpr(),
  130. PrecedenceGroup::ForTopLevelExpr(), *token, subtree_start));
  131. }
  132. }
  133. auto Context::FindNextOf(std::initializer_list<Lex::TokenKind> desired_kinds)
  134. -> std::optional<Lex::Token> {
  135. auto new_position = position_;
  136. while (true) {
  137. Lex::Token token = *new_position;
  138. Lex::TokenKind kind = tokens().GetKind(token);
  139. if (kind.IsOneOf(desired_kinds)) {
  140. return token;
  141. }
  142. // Step to the next token at the current bracketing level.
  143. if (kind.is_closing_symbol() || kind == Lex::TokenKind::EndOfFile) {
  144. // There are no more tokens at this level.
  145. return std::nullopt;
  146. } else if (kind.is_opening_symbol()) {
  147. new_position = Lex::TokenIterator(tokens().GetMatchedClosingToken(token));
  148. // Advance past the closing token.
  149. ++new_position;
  150. } else {
  151. ++new_position;
  152. }
  153. }
  154. }
  155. auto Context::SkipMatchingGroup() -> bool {
  156. if (!PositionKind().is_opening_symbol()) {
  157. return false;
  158. }
  159. SkipTo(tokens().GetMatchedClosingToken(*position_));
  160. ++position_;
  161. return true;
  162. }
  163. auto Context::SkipPastLikelyEnd(Lex::Token skip_root)
  164. -> std::optional<Lex::Token> {
  165. if (position_ == end_) {
  166. return std::nullopt;
  167. }
  168. Lex::Line root_line = tokens().GetLine(skip_root);
  169. int root_line_indent = tokens().GetIndentColumnNumber(root_line);
  170. // We will keep scanning through tokens on the same line as the root or
  171. // lines with greater indentation than root's line.
  172. auto is_same_line_or_indent_greater_than_root = [&](Lex::Token t) {
  173. Lex::Line l = tokens().GetLine(t);
  174. if (l == root_line) {
  175. return true;
  176. }
  177. return tokens().GetIndentColumnNumber(l) > root_line_indent;
  178. };
  179. do {
  180. if (PositionIs(Lex::TokenKind::CloseCurlyBrace)) {
  181. // Immediately bail out if we hit an unmatched close curly, this will
  182. // pop us up a level of the syntax grouping.
  183. return std::nullopt;
  184. }
  185. // We assume that a semicolon is always intended to be the end of the
  186. // current construct.
  187. if (auto semi = ConsumeIf(Lex::TokenKind::Semi)) {
  188. return semi;
  189. }
  190. // Skip over any matching group of tokens().
  191. if (SkipMatchingGroup()) {
  192. continue;
  193. }
  194. // Otherwise just step forward one token.
  195. ++position_;
  196. } while (position_ != end_ &&
  197. is_same_line_or_indent_greater_than_root(*position_));
  198. return std::nullopt;
  199. }
  200. auto Context::SkipTo(Lex::Token t) -> void {
  201. CARBON_CHECK(t >= *position_) << "Tried to skip backwards from " << position_
  202. << " to " << Lex::TokenIterator(t);
  203. position_ = Lex::TokenIterator(t);
  204. CARBON_CHECK(position_ != end_) << "Skipped past EOF.";
  205. }
  206. // Determines whether the given token is considered to be the start of an
  207. // operand according to the rules for infix operator parsing.
  208. static auto IsAssumedStartOfOperand(Lex::TokenKind kind) -> bool {
  209. return kind.IsOneOf({Lex::TokenKind::OpenParen, Lex::TokenKind::Identifier,
  210. Lex::TokenKind::IntegerLiteral,
  211. Lex::TokenKind::RealLiteral,
  212. Lex::TokenKind::StringLiteral});
  213. }
  214. // Determines whether the given token is considered to be the end of an
  215. // operand according to the rules for infix operator parsing.
  216. static auto IsAssumedEndOfOperand(Lex::TokenKind kind) -> bool {
  217. return kind.IsOneOf(
  218. {Lex::TokenKind::CloseParen, Lex::TokenKind::CloseCurlyBrace,
  219. Lex::TokenKind::CloseSquareBracket, Lex::TokenKind::Identifier,
  220. Lex::TokenKind::IntegerLiteral, Lex::TokenKind::RealLiteral,
  221. Lex::TokenKind::StringLiteral});
  222. }
  223. // Determines whether the given token could possibly be the start of an
  224. // operand. This is conservatively correct, and will never incorrectly return
  225. // `false`, but can incorrectly return `true`.
  226. static auto IsPossibleStartOfOperand(Lex::TokenKind kind) -> bool {
  227. return !kind.IsOneOf(
  228. {Lex::TokenKind::CloseParen, Lex::TokenKind::CloseCurlyBrace,
  229. Lex::TokenKind::CloseSquareBracket, Lex::TokenKind::Comma,
  230. Lex::TokenKind::Semi, Lex::TokenKind::Colon});
  231. }
  232. auto Context::IsLexicallyValidInfixOperator() -> bool {
  233. CARBON_CHECK(position_ != end_) << "Expected an operator token.";
  234. bool leading_space = tokens().HasLeadingWhitespace(*position_);
  235. bool trailing_space = tokens().HasTrailingWhitespace(*position_);
  236. // If there's whitespace on both sides, it's an infix operator.
  237. if (leading_space && trailing_space) {
  238. return true;
  239. }
  240. // If there's whitespace on exactly one side, it's not an infix operator.
  241. if (leading_space || trailing_space) {
  242. return false;
  243. }
  244. // Otherwise, for an infix operator, the preceding token must be any close
  245. // bracket, identifier, or literal and the next token must be an open paren,
  246. // identifier, or literal.
  247. if (position_ == tokens().tokens().begin() ||
  248. !IsAssumedEndOfOperand(tokens().GetKind(*(position_ - 1))) ||
  249. !IsAssumedStartOfOperand(tokens().GetKind(*(position_ + 1)))) {
  250. return false;
  251. }
  252. return true;
  253. }
  254. auto Context::IsTrailingOperatorInfix() -> bool {
  255. if (position_ == end_) {
  256. return false;
  257. }
  258. // An operator that follows the infix operator rules is parsed as
  259. // infix, unless the next token means that it can't possibly be.
  260. if (IsLexicallyValidInfixOperator() &&
  261. IsPossibleStartOfOperand(tokens().GetKind(*(position_ + 1)))) {
  262. return true;
  263. }
  264. // A trailing operator with leading whitespace that's not valid as infix is
  265. // not valid at all. If the next token looks like the start of an operand,
  266. // then parse as infix, otherwise as postfix. Either way we'll produce a
  267. // diagnostic later on.
  268. if (tokens().HasLeadingWhitespace(*position_) &&
  269. IsAssumedStartOfOperand(tokens().GetKind(*(position_ + 1)))) {
  270. return true;
  271. }
  272. return false;
  273. }
  274. auto Context::DiagnoseOperatorFixity(OperatorFixity fixity) -> void {
  275. if (!PositionKind().is_symbol()) {
  276. // Whitespace-based fixity rules only apply to symbolic operators.
  277. return;
  278. }
  279. if (fixity == OperatorFixity::Infix) {
  280. // Infix operators must satisfy the infix operator rules.
  281. if (!IsLexicallyValidInfixOperator()) {
  282. CARBON_DIAGNOSTIC(BinaryOperatorRequiresWhitespace, Error,
  283. "Whitespace missing {0} binary operator.",
  284. RelativeLocation);
  285. emitter_->Emit(*position_, BinaryOperatorRequiresWhitespace,
  286. tokens().HasLeadingWhitespace(*position_)
  287. ? RelativeLocation::After
  288. : (tokens().HasTrailingWhitespace(*position_)
  289. ? RelativeLocation::Before
  290. : RelativeLocation::Around));
  291. }
  292. } else {
  293. bool prefix = fixity == OperatorFixity::Prefix;
  294. // Whitespace is not permitted between a symbolic pre/postfix operator and
  295. // its operand.
  296. if ((prefix ? tokens().HasTrailingWhitespace(*position_)
  297. : tokens().HasLeadingWhitespace(*position_))) {
  298. CARBON_DIAGNOSTIC(UnaryOperatorHasWhitespace, Error,
  299. "Whitespace is not allowed {0} this unary operator.",
  300. RelativeLocation);
  301. emitter_->Emit(
  302. *position_, UnaryOperatorHasWhitespace,
  303. prefix ? RelativeLocation::After : RelativeLocation::Before);
  304. } else if (IsLexicallyValidInfixOperator()) {
  305. // Pre/postfix operators must not satisfy the infix operator rules.
  306. CARBON_DIAGNOSTIC(UnaryOperatorRequiresWhitespace, Error,
  307. "Whitespace is required {0} this unary operator.",
  308. RelativeLocation);
  309. emitter_->Emit(
  310. *position_, UnaryOperatorRequiresWhitespace,
  311. prefix ? RelativeLocation::Before : RelativeLocation::After);
  312. }
  313. }
  314. }
  315. auto Context::ConsumeListToken(NodeKind comma_kind, Lex::TokenKind close_kind,
  316. bool already_has_error) -> ListTokenKind {
  317. if (!PositionIs(Lex::TokenKind::Comma) && !PositionIs(close_kind)) {
  318. // Don't error a second time on the same element.
  319. if (!already_has_error) {
  320. CARBON_DIAGNOSTIC(UnexpectedTokenAfterListElement, Error,
  321. "Expected `,` or `{0}`.", Lex::TokenKind);
  322. emitter_->Emit(*position_, UnexpectedTokenAfterListElement, close_kind);
  323. ReturnErrorOnState();
  324. }
  325. // Recover from the invalid token.
  326. auto end_of_element = FindNextOf({Lex::TokenKind::Comma, close_kind});
  327. // The lexer guarantees that parentheses are balanced.
  328. CARBON_CHECK(end_of_element)
  329. << "missing matching `" << close_kind.opening_symbol() << "` for `"
  330. << close_kind << "`";
  331. SkipTo(*end_of_element);
  332. }
  333. if (PositionIs(close_kind)) {
  334. return ListTokenKind::Close;
  335. } else {
  336. AddLeafNode(comma_kind, Consume());
  337. return PositionIs(close_kind) ? ListTokenKind::CommaClose
  338. : ListTokenKind::Comma;
  339. }
  340. }
  341. auto Context::GetDeclContext() -> DeclContext {
  342. // i == 0 is the file-level DeclScopeLoop. Additionally, i == 1 can be
  343. // skipped because it will never be a DeclScopeLoop.
  344. for (int i = state_stack_.size() - 1; i > 1; --i) {
  345. // The declaration context is always the state _above_ a
  346. // DeclScopeLoop.
  347. if (state_stack_[i].state == State::DeclScopeLoop) {
  348. switch (state_stack_[i - 1].state) {
  349. case State::TypeDefinitionFinishAsClass:
  350. return DeclContext::Class;
  351. case State::TypeDefinitionFinishAsInterface:
  352. return DeclContext::Interface;
  353. case State::TypeDefinitionFinishAsNamedConstraint:
  354. return DeclContext::NamedConstraint;
  355. default:
  356. llvm_unreachable("Missing handling for a declaration scope");
  357. }
  358. }
  359. }
  360. CARBON_CHECK(!state_stack_.empty() &&
  361. state_stack_[0].state == State::DeclScopeLoop);
  362. return DeclContext::File;
  363. }
  364. auto Context::RecoverFromDeclError(StateStackEntry state,
  365. NodeKind parse_node_kind,
  366. bool skip_past_likely_end) -> void {
  367. auto token = state.token;
  368. if (skip_past_likely_end) {
  369. if (auto semi = SkipPastLikelyEnd(token)) {
  370. token = *semi;
  371. }
  372. }
  373. AddNode(parse_node_kind, token, state.subtree_start,
  374. /*has_error=*/true);
  375. }
  376. auto Context::EmitExpectedDeclSemi(Lex::TokenKind expected_kind) -> void {
  377. CARBON_DIAGNOSTIC(ExpectedDeclSemi, Error,
  378. "`{0}` declarations must end with a `;`.", Lex::TokenKind);
  379. emitter().Emit(*position(), ExpectedDeclSemi, expected_kind);
  380. }
  381. auto Context::EmitExpectedDeclSemiOrDefinition(Lex::TokenKind expected_kind)
  382. -> void {
  383. CARBON_DIAGNOSTIC(ExpectedDeclSemiOrDefinition, Error,
  384. "`{0}` declarations must either end with a `;` or "
  385. "have a `{{ ... }` block for a definition.",
  386. Lex::TokenKind);
  387. emitter().Emit(*position(), ExpectedDeclSemiOrDefinition, expected_kind);
  388. }
  389. auto Context::PrintForStackDump(llvm::raw_ostream& output) const -> void {
  390. output << "Parser stack:\n";
  391. for (auto [i, entry] : llvm::enumerate(state_stack_)) {
  392. output << "\t" << i << ".\t" << entry.state;
  393. PrintTokenForStackDump(output, entry.token);
  394. }
  395. output << "\tcursor\tposition_";
  396. PrintTokenForStackDump(output, *position_);
  397. }
  398. auto Context::PrintTokenForStackDump(llvm::raw_ostream& output,
  399. Lex::Token token) const -> void {
  400. output << " @ " << tokens_->GetLineNumber(tokens_->GetLine(token)) << ":"
  401. << tokens_->GetColumnNumber(token) << ": token " << token << " : "
  402. << tokens_->GetKind(token) << "\n";
  403. }
  404. } // namespace Carbon::Parse