context.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "toolchain/parse/context.h"
  5. #include <optional>
  6. #include "common/check.h"
  7. #include "common/ostream.h"
  8. #include "llvm/ADT/STLExtras.h"
  9. #include "toolchain/diagnostics/diagnostic_emitter.h"
  10. #include "toolchain/diagnostics/format_providers.h"
  11. #include "toolchain/lex/token_kind.h"
  12. #include "toolchain/lex/tokenized_buffer.h"
  13. #include "toolchain/parse/node_ids.h"
  14. #include "toolchain/parse/node_kind.h"
  15. #include "toolchain/parse/state.h"
  16. #include "toolchain/parse/tree.h"
  17. #include "toolchain/parse/typed_nodes.h"
  18. namespace Carbon::Parse {
  19. Context::Context(Tree* tree, Lex::TokenizedBuffer* tokens,
  20. DiagnosticConsumer* consumer, llvm::raw_ostream* vlog_stream)
  21. : tree_(tree),
  22. tokens_(tokens),
  23. err_tracker_(*consumer),
  24. emitter_(&err_tracker_, this),
  25. vlog_stream_(vlog_stream),
  26. position_(tokens_->tokens().begin()),
  27. end_(tokens_->tokens().end()) {
  28. CARBON_CHECK(position_ != end_, "Empty TokenizedBuffer");
  29. --end_;
  30. CARBON_CHECK(tokens_->GetKind(*end_) == Lex::TokenKind::FileEnd,
  31. "TokenizedBuffer should end with FileEnd, ended with {0}",
  32. tokens_->GetKind(*end_));
  33. }
  34. auto Context::ReplacePlaceholderNode(int32_t position, NodeKind kind,
  35. Lex::TokenIndex token, bool has_error)
  36. -> void {
  37. CARBON_CHECK(
  38. kind != NodeKind::InvalidParse && kind != NodeKind::InvalidParseSubtree,
  39. "{0} shouldn't occur in Placeholder use-cases", kind);
  40. CARBON_CHECK(position >= 0 && position < tree_->size(),
  41. "position: {0} size: {1}", position, tree_->size());
  42. auto* node_impl = &tree_->node_impls_[position];
  43. CARBON_CHECK(node_impl->kind() == NodeKind::Placeholder);
  44. *node_impl = Tree::NodeImpl(kind, has_error, token);
  45. }
  46. auto Context::ConsumeAndAddOpenParen(Lex::TokenIndex default_token,
  47. NodeKind start_kind)
  48. -> std::optional<Lex::TokenIndex> {
  49. if (auto open_paren = ConsumeIf(Lex::TokenKind::OpenParen)) {
  50. AddLeafNode(start_kind, *open_paren, /*has_error=*/false);
  51. return open_paren;
  52. } else {
  53. CARBON_DIAGNOSTIC(ExpectedParenAfter, Error, "expected `(` after `{0}`",
  54. Lex::TokenKind);
  55. emitter_.Emit(*position_, ExpectedParenAfter,
  56. tokens().GetKind(default_token));
  57. AddLeafNode(start_kind, default_token, /*has_error=*/true);
  58. return std::nullopt;
  59. }
  60. }
  61. auto Context::ConsumeAndAddCloseSymbol(Lex::TokenIndex expected_open,
  62. StateStackEntry state,
  63. NodeKind close_kind) -> void {
  64. Lex::TokenKind open_token_kind = tokens().GetKind(expected_open);
  65. if (!open_token_kind.is_opening_symbol()) {
  66. AddNode(close_kind, state.token, /*has_error=*/true);
  67. } else if (auto close_token = ConsumeIf(open_token_kind.closing_symbol())) {
  68. AddNode(close_kind, *close_token, state.has_error);
  69. } else {
  70. // TODO: Include the location of the matching opening delimiter in the
  71. // diagnostic.
  72. CARBON_DIAGNOSTIC(ExpectedCloseSymbol, Error,
  73. "unexpected tokens before `{0}`", Lex::TokenKind);
  74. emitter_.Emit(*position_, ExpectedCloseSymbol,
  75. open_token_kind.closing_symbol());
  76. SkipTo(tokens().GetMatchedClosingToken(expected_open));
  77. AddNode(close_kind, Consume(), /*has_error=*/true);
  78. }
  79. }
  80. auto Context::ConsumeAndAddLeafNodeIf(Lex::TokenKind token_kind,
  81. NodeKind node_kind) -> bool {
  82. auto token = ConsumeIf(token_kind);
  83. if (!token) {
  84. return false;
  85. }
  86. AddLeafNode(node_kind, *token);
  87. return true;
  88. }
  89. auto Context::ConsumeChecked(Lex::TokenKind kind) -> Lex::TokenIndex {
  90. CARBON_CHECK(PositionIs(kind), "Required {0}, found {1}", kind,
  91. PositionKind());
  92. return Consume();
  93. }
  94. auto Context::FindNextOf(std::initializer_list<Lex::TokenKind> desired_kinds)
  95. -> std::optional<Lex::TokenIndex> {
  96. auto new_position = position_;
  97. while (true) {
  98. Lex::TokenIndex token = *new_position;
  99. Lex::TokenKind kind = tokens().GetKind(token);
  100. if (kind.IsOneOf(desired_kinds)) {
  101. return token;
  102. }
  103. // Step to the next token at the current bracketing level.
  104. if (kind.is_closing_symbol() || kind == Lex::TokenKind::FileEnd) {
  105. // There are no more tokens at this level.
  106. return std::nullopt;
  107. } else if (kind.is_opening_symbol()) {
  108. new_position = Lex::TokenIterator(tokens().GetMatchedClosingToken(token));
  109. // Advance past the closing token.
  110. ++new_position;
  111. } else {
  112. ++new_position;
  113. }
  114. }
  115. }
  116. auto Context::SkipMatchingGroup() -> bool {
  117. if (!PositionKind().is_opening_symbol()) {
  118. return false;
  119. }
  120. SkipTo(tokens().GetMatchedClosingToken(*position_));
  121. ++position_;
  122. return true;
  123. }
  124. auto Context::SkipPastLikelyEnd(Lex::TokenIndex skip_root) -> Lex::TokenIndex {
  125. if (position_ == end_) {
  126. return *(position_ - 1);
  127. }
  128. Lex::LineIndex root_line = tokens().GetLine(skip_root);
  129. int root_line_indent = tokens().GetIndentColumnNumber(root_line);
  130. // We will keep scanning through tokens on the same line as the root or
  131. // lines with greater indentation than root's line.
  132. auto is_same_line_or_indent_greater_than_root = [&](Lex::TokenIndex t) {
  133. Lex::LineIndex l = tokens().GetLine(t);
  134. if (l == root_line) {
  135. return true;
  136. }
  137. return tokens().GetIndentColumnNumber(l) > root_line_indent;
  138. };
  139. do {
  140. if (PositionIs(Lex::TokenKind::CloseCurlyBrace)) {
  141. // Immediately bail out if we hit an unmatched close curly, this will
  142. // pop us up a level of the syntax grouping.
  143. return *(position_ - 1);
  144. }
  145. // We assume that a semicolon is always intended to be the end of the
  146. // current construct.
  147. if (auto semi = ConsumeIf(Lex::TokenKind::Semi)) {
  148. return *semi;
  149. }
  150. // Skip over any matching group of tokens().
  151. if (SkipMatchingGroup()) {
  152. continue;
  153. }
  154. // Otherwise just step forward one token.
  155. ++position_;
  156. } while (position_ != end_ &&
  157. is_same_line_or_indent_greater_than_root(*position_));
  158. return *(position_ - 1);
  159. }
  160. auto Context::SkipTo(Lex::TokenIndex t) -> void {
  161. CARBON_CHECK(t >= *position_, "Tried to skip backwards from {0} to {1}",
  162. position_, Lex::TokenIterator(t));
  163. position_ = Lex::TokenIterator(t);
  164. CARBON_CHECK(position_ != end_, "Skipped past EOF.");
  165. }
  166. // Determines whether the given token is considered to be the start of an
  167. // operand according to the rules for infix operator parsing.
  168. static auto IsAssumedStartOfOperand(Lex::TokenKind kind) -> bool {
  169. return kind.IsOneOf({Lex::TokenKind::OpenParen, Lex::TokenKind::Identifier,
  170. Lex::TokenKind::IntLiteral, Lex::TokenKind::RealLiteral,
  171. Lex::TokenKind::StringLiteral});
  172. }
  173. // Determines whether the given token is considered to be the end of an
  174. // operand according to the rules for infix operator parsing.
  175. static auto IsAssumedEndOfOperand(Lex::TokenKind kind) -> bool {
  176. return kind.IsOneOf(
  177. {Lex::TokenKind::CloseParen, Lex::TokenKind::CloseCurlyBrace,
  178. Lex::TokenKind::CloseSquareBracket, Lex::TokenKind::Identifier,
  179. Lex::TokenKind::IntLiteral, Lex::TokenKind::RealLiteral,
  180. Lex::TokenKind::StringLiteral});
  181. }
  182. // Determines whether the given token could possibly be the start of an
  183. // operand. This is conservatively correct, and will never incorrectly return
  184. // `false`, but can incorrectly return `true`.
  185. static auto IsPossibleStartOfOperand(Lex::TokenKind kind) -> bool {
  186. return !kind.IsOneOf(
  187. {Lex::TokenKind::CloseParen, Lex::TokenKind::CloseCurlyBrace,
  188. Lex::TokenKind::CloseSquareBracket, Lex::TokenKind::Comma,
  189. Lex::TokenKind::Semi, Lex::TokenKind::Colon});
  190. }
  191. auto Context::IsLexicallyValidInfixOperator() -> bool {
  192. CARBON_CHECK(position_ != end_, "Expected an operator token.");
  193. bool leading_space = tokens().HasLeadingWhitespace(*position_);
  194. bool trailing_space = tokens().HasTrailingWhitespace(*position_);
  195. // If there's whitespace on both sides, it's an infix operator.
  196. if (leading_space && trailing_space) {
  197. return true;
  198. }
  199. // If there's whitespace on exactly one side, it's not an infix operator.
  200. if (leading_space || trailing_space) {
  201. return false;
  202. }
  203. // Otherwise, for an infix operator, the preceding token must be any close
  204. // bracket, identifier, or literal and the next token must be an open paren,
  205. // identifier, or literal.
  206. if (position_ == tokens().tokens().begin() ||
  207. !IsAssumedEndOfOperand(tokens().GetKind(*(position_ - 1))) ||
  208. !IsAssumedStartOfOperand(tokens().GetKind(*(position_ + 1)))) {
  209. return false;
  210. }
  211. return true;
  212. }
  213. auto Context::IsTrailingOperatorInfix() -> bool {
  214. if (position_ == end_) {
  215. return false;
  216. }
  217. // An operator that follows the infix operator rules is parsed as
  218. // infix, unless the next token means that it can't possibly be.
  219. if (IsLexicallyValidInfixOperator() &&
  220. IsPossibleStartOfOperand(tokens().GetKind(*(position_ + 1)))) {
  221. return true;
  222. }
  223. // A trailing operator with leading whitespace that's not valid as infix is
  224. // not valid at all. If the next token looks like the start of an operand,
  225. // then parse as infix, otherwise as postfix. Either way we'll produce a
  226. // diagnostic later on.
  227. if (tokens().HasLeadingWhitespace(*position_) &&
  228. IsAssumedStartOfOperand(tokens().GetKind(*(position_ + 1)))) {
  229. return true;
  230. }
  231. return false;
  232. }
  233. auto Context::DiagnoseOperatorFixity(OperatorFixity fixity) -> void {
  234. if (!PositionKind().is_symbol()) {
  235. // Whitespace-based fixity rules only apply to symbolic operators.
  236. return;
  237. }
  238. if (fixity == OperatorFixity::Infix) {
  239. // Infix operators must satisfy the infix operator rules.
  240. if (!IsLexicallyValidInfixOperator()) {
  241. CARBON_DIAGNOSTIC(BinaryOperatorRequiresWhitespace, Error,
  242. "whitespace missing {0:=-1:before|=0:around|=1:after} "
  243. "binary operator",
  244. IntAsSelect);
  245. IntAsSelect pos(0);
  246. if (tokens().HasLeadingWhitespace(*position_)) {
  247. pos.value = 1;
  248. } else if (tokens().HasTrailingWhitespace(*position_)) {
  249. pos.value = -1;
  250. }
  251. emitter_.Emit(*position_, BinaryOperatorRequiresWhitespace, pos);
  252. }
  253. } else {
  254. bool prefix = fixity == OperatorFixity::Prefix;
  255. // Whitespace is not permitted between a symbolic pre/postfix operator and
  256. // its operand.
  257. if ((prefix ? tokens().HasTrailingWhitespace(*position_)
  258. : tokens().HasLeadingWhitespace(*position_))) {
  259. CARBON_DIAGNOSTIC(
  260. UnaryOperatorHasWhitespace, Error,
  261. "whitespace is not allowed {0:after|before} this unary operator",
  262. BoolAsSelect);
  263. emitter_.Emit(*position_, UnaryOperatorHasWhitespace, prefix);
  264. } else if (IsLexicallyValidInfixOperator()) {
  265. // Pre/postfix operators must not satisfy the infix operator rules.
  266. CARBON_DIAGNOSTIC(
  267. UnaryOperatorRequiresWhitespace, Error,
  268. "whitespace is required {0:before|after} this unary operator",
  269. BoolAsSelect);
  270. emitter_.Emit(*position_, UnaryOperatorRequiresWhitespace, prefix);
  271. }
  272. }
  273. }
  274. auto Context::ConsumeListToken(NodeKind comma_kind, Lex::TokenKind close_kind,
  275. bool already_has_error) -> ListTokenKind {
  276. if (!PositionIs(Lex::TokenKind::Comma) && !PositionIs(close_kind)) {
  277. // Don't error a second time on the same element.
  278. if (!already_has_error) {
  279. CARBON_DIAGNOSTIC(UnexpectedTokenAfterListElement, Error,
  280. "expected `,` or `{0}`", Lex::TokenKind);
  281. emitter_.Emit(*position_, UnexpectedTokenAfterListElement, close_kind);
  282. ReturnErrorOnState();
  283. }
  284. // Recover from the invalid token.
  285. auto end_of_element = FindNextOf({Lex::TokenKind::Comma, close_kind});
  286. // The lexer guarantees that parentheses are balanced.
  287. CARBON_CHECK(end_of_element, "missing matching `{0}` for `{1}`",
  288. close_kind.opening_symbol(), close_kind);
  289. SkipTo(*end_of_element);
  290. }
  291. if (PositionIs(close_kind)) {
  292. return ListTokenKind::Close;
  293. } else {
  294. AddLeafNode(comma_kind, Consume(),
  295. /*has_error=*/comma_kind == NodeKind::InvalidParse);
  296. return PositionIs(close_kind) ? ListTokenKind::CommaClose
  297. : ListTokenKind::Comma;
  298. }
  299. }
  300. auto Context::AddNodeExpectingDeclSemi(StateStackEntry state,
  301. NodeKind node_kind,
  302. Lex::TokenKind decl_kind,
  303. bool is_def_allowed) -> void {
  304. // TODO: This could better handle things like:
  305. // base: { }
  306. // var n: i32;
  307. // ^ Ends up at `n`, instead of `var`.
  308. if (state.has_error) {
  309. RecoverFromDeclError(state, node_kind,
  310. /*skip_past_likely_end=*/true);
  311. return;
  312. }
  313. if (auto semi = ConsumeIf(Lex::TokenKind::Semi)) {
  314. AddNode(node_kind, *semi, /*has_error=*/false);
  315. } else {
  316. if (is_def_allowed) {
  317. DiagnoseExpectedDeclSemiOrDefinition(decl_kind);
  318. } else {
  319. DiagnoseExpectedDeclSemi(decl_kind);
  320. }
  321. RecoverFromDeclError(state, node_kind,
  322. /*skip_past_likely_end=*/true);
  323. }
  324. }
  325. auto Context::RecoverFromDeclError(StateStackEntry state, NodeKind node_kind,
  326. bool skip_past_likely_end) -> void {
  327. auto token = state.token;
  328. if (skip_past_likely_end) {
  329. token = SkipPastLikelyEnd(token);
  330. }
  331. AddNode(node_kind, token, /*has_error=*/true);
  332. }
  333. auto Context::ParseLibraryName(bool accept_default)
  334. -> std::optional<StringLiteralValueId> {
  335. if (auto library_name_token = ConsumeIf(Lex::TokenKind::StringLiteral)) {
  336. AddLeafNode(NodeKind::LibraryName, *library_name_token);
  337. return tokens().GetStringLiteralValue(*library_name_token);
  338. }
  339. if (accept_default) {
  340. if (auto default_token = ConsumeIf(Lex::TokenKind::Default)) {
  341. AddLeafNode(NodeKind::DefaultLibrary, *default_token);
  342. return StringLiteralValueId::None;
  343. }
  344. }
  345. CARBON_DIAGNOSTIC(
  346. ExpectedLibraryNameOrDefault, Error,
  347. "expected `default` or a string literal to specify the library name");
  348. CARBON_DIAGNOSTIC(ExpectedLibraryName, Error,
  349. "expected a string literal to specify the library name");
  350. emitter().Emit(*position(), accept_default ? ExpectedLibraryNameOrDefault
  351. : ExpectedLibraryName);
  352. return std::nullopt;
  353. }
  354. auto Context::ParseLibrarySpecifier(bool accept_default)
  355. -> std::optional<StringLiteralValueId> {
  356. auto library_token = ConsumeChecked(Lex::TokenKind::Library);
  357. auto library_id = ParseLibraryName(accept_default);
  358. if (!library_id) {
  359. AddLeafNode(NodeKind::LibraryName, *position_, /*has_error=*/true);
  360. }
  361. AddNode(NodeKind::LibrarySpecifier, library_token, /*has_error=*/false);
  362. return library_id;
  363. }
  364. auto Context::DiagnoseExpectedDeclSemi(Lex::TokenKind expected_kind) -> void {
  365. CARBON_DIAGNOSTIC(ExpectedDeclSemi, Error,
  366. "`{0}` declarations must end with a `;`", Lex::TokenKind);
  367. emitter().Emit(*position(), ExpectedDeclSemi, expected_kind);
  368. }
  369. auto Context::DiagnoseExpectedDeclSemiOrDefinition(Lex::TokenKind expected_kind)
  370. -> void {
  371. CARBON_DIAGNOSTIC(ExpectedDeclSemiOrDefinition, Error,
  372. "`{0}` declarations must either end with a `;` or "
  373. "have a `{{ ... }` block for a definition",
  374. Lex::TokenKind);
  375. emitter().Emit(*position(), ExpectedDeclSemiOrDefinition, expected_kind);
  376. }
  377. // Returns whether we are currently parsing in a scope in which function
  378. // definitions are deferred, such as a class or interface.
  379. static auto ParsingInDeferredDefinitionScope(Context& context) -> bool {
  380. auto& stack = context.state_stack();
  381. if (stack.size() < 2 || stack.back().state != State::DeclScopeLoop) {
  382. return false;
  383. }
  384. auto state = stack[stack.size() - 2].state;
  385. return state == State::DeclDefinitionFinishAsClass ||
  386. state == State::DeclDefinitionFinishAsImpl ||
  387. state == State::DeclDefinitionFinishAsInterface ||
  388. state == State::DeclDefinitionFinishAsNamedConstraint;
  389. }
  390. auto Context::AddFunctionDefinitionStart(Lex::TokenIndex token, bool has_error)
  391. -> void {
  392. if (ParsingInDeferredDefinitionScope(*this)) {
  393. deferred_definition_stack_.push_back(tree_->deferred_definitions_.Add(
  394. {.start_id =
  395. FunctionDefinitionStartId(NodeId(tree_->node_impls_.size()))}));
  396. }
  397. AddNode(NodeKind::FunctionDefinitionStart, token, has_error);
  398. }
  399. auto Context::AddFunctionDefinition(Lex::TokenIndex token, bool has_error)
  400. -> void {
  401. if (ParsingInDeferredDefinitionScope(*this)) {
  402. auto definition_index = deferred_definition_stack_.pop_back_val();
  403. auto& definition = tree_->deferred_definitions_.Get(definition_index);
  404. definition.definition_id =
  405. FunctionDefinitionId(NodeId(tree_->node_impls_.size()));
  406. definition.next_definition_index =
  407. DeferredDefinitionIndex(tree_->deferred_definitions().size());
  408. }
  409. AddNode(NodeKind::FunctionDefinition, token, has_error);
  410. }
  411. auto Context::PrintForStackDump(llvm::raw_ostream& output) const -> void {
  412. output << "Parser stack:\n";
  413. for (auto [i, entry] : llvm::enumerate(state_stack_)) {
  414. output << "\t" << i << ".\t" << entry.state;
  415. PrintTokenForStackDump(output, entry.token);
  416. }
  417. output << "\tcursor\tposition_";
  418. PrintTokenForStackDump(output, *position_);
  419. }
  420. auto Context::PrintTokenForStackDump(llvm::raw_ostream& output,
  421. Lex::TokenIndex token) const -> void {
  422. output << " @ " << tokens_->GetLineNumber(token) << ":"
  423. << tokens_->GetColumnNumber(token) << ": token " << token << " : "
  424. << tokens_->GetKind(token) << "\n";
  425. }
  426. } // namespace Carbon::Parse