context.cpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "toolchain/parse/context.h"
  5. #include <optional>
  6. #include "common/check.h"
  7. #include "common/ostream.h"
  8. #include "llvm/ADT/STLExtras.h"
  9. #include "toolchain/lex/token_kind.h"
  10. #include "toolchain/lex/tokenized_buffer.h"
  11. #include "toolchain/parse/node_ids.h"
  12. #include "toolchain/parse/node_kind.h"
  13. #include "toolchain/parse/state.h"
  14. #include "toolchain/parse/tree.h"
  15. #include "toolchain/parse/typed_nodes.h"
  16. namespace Carbon::Parse {
  17. // A relative location for characters in errors.
  18. enum class RelativeLoc : int8_t {
  19. Around,
  20. After,
  21. Before,
  22. };
  23. } // namespace Carbon::Parse
  24. // Adapts RelativeLoc for use with formatv.
  25. template <>
  26. struct llvm::format_provider<Carbon::Parse::RelativeLoc> {
  27. using RelativeLoc = Carbon::Parse::RelativeLoc;
  28. static void format(const RelativeLoc& loc, raw_ostream& out,
  29. StringRef /*style*/) {
  30. switch (loc) {
  31. case RelativeLoc::Around:
  32. out << "around";
  33. break;
  34. case RelativeLoc::After:
  35. out << "after";
  36. break;
  37. case RelativeLoc::Before:
  38. out << "before";
  39. break;
  40. }
  41. }
  42. };
  43. namespace Carbon::Parse {
  44. Context::Context(Tree& tree, Lex::TokenizedBuffer& tokens,
  45. Lex::TokenDiagnosticEmitter& emitter,
  46. llvm::raw_ostream* vlog_stream)
  47. : tree_(&tree),
  48. tokens_(&tokens),
  49. emitter_(&emitter),
  50. vlog_stream_(vlog_stream),
  51. position_(tokens_->tokens().begin()),
  52. end_(tokens_->tokens().end()) {
  53. CARBON_CHECK(position_ != end_) << "Empty TokenizedBuffer";
  54. --end_;
  55. CARBON_CHECK(tokens_->GetKind(*end_) == Lex::TokenKind::FileEnd)
  56. << "TokenizedBuffer should end with FileEnd, ended with "
  57. << tokens_->GetKind(*end_);
  58. }
  59. auto Context::AddLeafNode(NodeKind kind, Lex::TokenIndex token, bool has_error)
  60. -> void {
  61. tree_->node_impls_.push_back(Tree::NodeImpl(kind, has_error, token));
  62. }
  63. auto Context::AddNode(NodeKind kind, Lex::TokenIndex token, bool has_error)
  64. -> void {
  65. tree_->node_impls_.push_back(Tree::NodeImpl(kind, has_error, token));
  66. }
  67. auto Context::ReplacePlaceholderNode(int32_t position, NodeKind kind,
  68. Lex::TokenIndex token, bool has_error)
  69. -> void {
  70. CARBON_CHECK(position >= 0 && position < tree_->size())
  71. << "position: " << position << " size: " << tree_->size();
  72. auto* node_impl = &tree_->node_impls_[position];
  73. CARBON_CHECK(node_impl->kind == NodeKind::Placeholder);
  74. node_impl->kind = kind;
  75. node_impl->has_error = has_error;
  76. node_impl->token = token;
  77. }
  78. auto Context::ConsumeAndAddOpenParen(Lex::TokenIndex default_token,
  79. NodeKind start_kind)
  80. -> std::optional<Lex::TokenIndex> {
  81. if (auto open_paren = ConsumeIf(Lex::TokenKind::OpenParen)) {
  82. AddLeafNode(start_kind, *open_paren, /*has_error=*/false);
  83. return open_paren;
  84. } else {
  85. CARBON_DIAGNOSTIC(ExpectedParenAfter, Error, "Expected `(` after `{0}`.",
  86. Lex::TokenKind);
  87. emitter_->Emit(*position_, ExpectedParenAfter,
  88. tokens().GetKind(default_token));
  89. AddLeafNode(start_kind, default_token, /*has_error=*/true);
  90. return std::nullopt;
  91. }
  92. }
  93. auto Context::ConsumeAndAddCloseSymbol(Lex::TokenIndex expected_open,
  94. StateStackEntry state,
  95. NodeKind close_kind) -> void {
  96. Lex::TokenKind open_token_kind = tokens().GetKind(expected_open);
  97. if (!open_token_kind.is_opening_symbol()) {
  98. AddNode(close_kind, state.token, /*has_error=*/true);
  99. } else if (auto close_token = ConsumeIf(open_token_kind.closing_symbol())) {
  100. AddNode(close_kind, *close_token, state.has_error);
  101. } else {
  102. // TODO: Include the location of the matching opening delimiter in the
  103. // diagnostic.
  104. CARBON_DIAGNOSTIC(ExpectedCloseSymbol, Error,
  105. "Unexpected tokens before `{0}`.", llvm::StringLiteral);
  106. emitter_->Emit(*position_, ExpectedCloseSymbol,
  107. open_token_kind.closing_symbol().fixed_spelling());
  108. SkipTo(tokens().GetMatchedClosingToken(expected_open));
  109. AddNode(close_kind, Consume(), /*has_error=*/true);
  110. }
  111. }
  112. auto Context::ConsumeAndAddLeafNodeIf(Lex::TokenKind token_kind,
  113. NodeKind node_kind) -> bool {
  114. auto token = ConsumeIf(token_kind);
  115. if (!token) {
  116. return false;
  117. }
  118. AddLeafNode(node_kind, *token);
  119. return true;
  120. }
  121. auto Context::ConsumeChecked(Lex::TokenKind kind) -> Lex::TokenIndex {
  122. CARBON_CHECK(PositionIs(kind))
  123. << "Required " << kind << ", found " << PositionKind();
  124. return Consume();
  125. }
  126. auto Context::ConsumeIf(Lex::TokenKind kind) -> std::optional<Lex::TokenIndex> {
  127. if (!PositionIs(kind)) {
  128. return std::nullopt;
  129. }
  130. return Consume();
  131. }
  132. auto Context::FindNextOf(std::initializer_list<Lex::TokenKind> desired_kinds)
  133. -> std::optional<Lex::TokenIndex> {
  134. auto new_position = position_;
  135. while (true) {
  136. Lex::TokenIndex token = *new_position;
  137. Lex::TokenKind kind = tokens().GetKind(token);
  138. if (kind.IsOneOf(desired_kinds)) {
  139. return token;
  140. }
  141. // Step to the next token at the current bracketing level.
  142. if (kind.is_closing_symbol() || kind == Lex::TokenKind::FileEnd) {
  143. // There are no more tokens at this level.
  144. return std::nullopt;
  145. } else if (kind.is_opening_symbol()) {
  146. new_position = Lex::TokenIterator(tokens().GetMatchedClosingToken(token));
  147. // Advance past the closing token.
  148. ++new_position;
  149. } else {
  150. ++new_position;
  151. }
  152. }
  153. }
  154. auto Context::SkipMatchingGroup() -> bool {
  155. if (!PositionKind().is_opening_symbol()) {
  156. return false;
  157. }
  158. SkipTo(tokens().GetMatchedClosingToken(*position_));
  159. ++position_;
  160. return true;
  161. }
  162. auto Context::SkipPastLikelyEnd(Lex::TokenIndex skip_root) -> Lex::TokenIndex {
  163. if (position_ == end_) {
  164. return *(position_ - 1);
  165. }
  166. Lex::LineIndex root_line = tokens().GetLine(skip_root);
  167. int root_line_indent = tokens().GetIndentColumnNumber(root_line);
  168. // We will keep scanning through tokens on the same line as the root or
  169. // lines with greater indentation than root's line.
  170. auto is_same_line_or_indent_greater_than_root = [&](Lex::TokenIndex t) {
  171. Lex::LineIndex l = tokens().GetLine(t);
  172. if (l == root_line) {
  173. return true;
  174. }
  175. return tokens().GetIndentColumnNumber(l) > root_line_indent;
  176. };
  177. do {
  178. if (PositionIs(Lex::TokenKind::CloseCurlyBrace)) {
  179. // Immediately bail out if we hit an unmatched close curly, this will
  180. // pop us up a level of the syntax grouping.
  181. return *(position_ - 1);
  182. }
  183. // We assume that a semicolon is always intended to be the end of the
  184. // current construct.
  185. if (auto semi = ConsumeIf(Lex::TokenKind::Semi)) {
  186. return *semi;
  187. }
  188. // Skip over any matching group of tokens().
  189. if (SkipMatchingGroup()) {
  190. continue;
  191. }
  192. // Otherwise just step forward one token.
  193. ++position_;
  194. } while (position_ != end_ &&
  195. is_same_line_or_indent_greater_than_root(*position_));
  196. return *(position_ - 1);
  197. }
  198. auto Context::SkipTo(Lex::TokenIndex t) -> void {
  199. CARBON_CHECK(t >= *position_) << "Tried to skip backwards from " << position_
  200. << " to " << Lex::TokenIterator(t);
  201. position_ = Lex::TokenIterator(t);
  202. CARBON_CHECK(position_ != end_) << "Skipped past EOF.";
  203. }
  204. // Determines whether the given token is considered to be the start of an
  205. // operand according to the rules for infix operator parsing.
  206. static auto IsAssumedStartOfOperand(Lex::TokenKind kind) -> bool {
  207. return kind.IsOneOf({Lex::TokenKind::OpenParen, Lex::TokenKind::Identifier,
  208. Lex::TokenKind::IntLiteral, Lex::TokenKind::RealLiteral,
  209. Lex::TokenKind::StringLiteral});
  210. }
  211. // Determines whether the given token is considered to be the end of an
  212. // operand according to the rules for infix operator parsing.
  213. static auto IsAssumedEndOfOperand(Lex::TokenKind kind) -> bool {
  214. return kind.IsOneOf(
  215. {Lex::TokenKind::CloseParen, Lex::TokenKind::CloseCurlyBrace,
  216. Lex::TokenKind::CloseSquareBracket, Lex::TokenKind::Identifier,
  217. Lex::TokenKind::IntLiteral, Lex::TokenKind::RealLiteral,
  218. Lex::TokenKind::StringLiteral});
  219. }
  220. // Determines whether the given token could possibly be the start of an
  221. // operand. This is conservatively correct, and will never incorrectly return
  222. // `false`, but can incorrectly return `true`.
  223. static auto IsPossibleStartOfOperand(Lex::TokenKind kind) -> bool {
  224. return !kind.IsOneOf(
  225. {Lex::TokenKind::CloseParen, Lex::TokenKind::CloseCurlyBrace,
  226. Lex::TokenKind::CloseSquareBracket, Lex::TokenKind::Comma,
  227. Lex::TokenKind::Semi, Lex::TokenKind::Colon});
  228. }
  229. auto Context::IsLexicallyValidInfixOperator() -> bool {
  230. CARBON_CHECK(position_ != end_) << "Expected an operator token.";
  231. bool leading_space = tokens().HasLeadingWhitespace(*position_);
  232. bool trailing_space = tokens().HasTrailingWhitespace(*position_);
  233. // If there's whitespace on both sides, it's an infix operator.
  234. if (leading_space && trailing_space) {
  235. return true;
  236. }
  237. // If there's whitespace on exactly one side, it's not an infix operator.
  238. if (leading_space || trailing_space) {
  239. return false;
  240. }
  241. // Otherwise, for an infix operator, the preceding token must be any close
  242. // bracket, identifier, or literal and the next token must be an open paren,
  243. // identifier, or literal.
  244. if (position_ == tokens().tokens().begin() ||
  245. !IsAssumedEndOfOperand(tokens().GetKind(*(position_ - 1))) ||
  246. !IsAssumedStartOfOperand(tokens().GetKind(*(position_ + 1)))) {
  247. return false;
  248. }
  249. return true;
  250. }
  251. auto Context::IsTrailingOperatorInfix() -> bool {
  252. if (position_ == end_) {
  253. return false;
  254. }
  255. // An operator that follows the infix operator rules is parsed as
  256. // infix, unless the next token means that it can't possibly be.
  257. if (IsLexicallyValidInfixOperator() &&
  258. IsPossibleStartOfOperand(tokens().GetKind(*(position_ + 1)))) {
  259. return true;
  260. }
  261. // A trailing operator with leading whitespace that's not valid as infix is
  262. // not valid at all. If the next token looks like the start of an operand,
  263. // then parse as infix, otherwise as postfix. Either way we'll produce a
  264. // diagnostic later on.
  265. if (tokens().HasLeadingWhitespace(*position_) &&
  266. IsAssumedStartOfOperand(tokens().GetKind(*(position_ + 1)))) {
  267. return true;
  268. }
  269. return false;
  270. }
  271. auto Context::DiagnoseOperatorFixity(OperatorFixity fixity) -> void {
  272. if (!PositionKind().is_symbol()) {
  273. // Whitespace-based fixity rules only apply to symbolic operators.
  274. return;
  275. }
  276. if (fixity == OperatorFixity::Infix) {
  277. // Infix operators must satisfy the infix operator rules.
  278. if (!IsLexicallyValidInfixOperator()) {
  279. CARBON_DIAGNOSTIC(BinaryOperatorRequiresWhitespace, Error,
  280. "Whitespace missing {0} binary operator.", RelativeLoc);
  281. emitter_->Emit(*position_, BinaryOperatorRequiresWhitespace,
  282. tokens().HasLeadingWhitespace(*position_)
  283. ? RelativeLoc::After
  284. : (tokens().HasTrailingWhitespace(*position_)
  285. ? RelativeLoc::Before
  286. : RelativeLoc::Around));
  287. }
  288. } else {
  289. bool prefix = fixity == OperatorFixity::Prefix;
  290. // Whitespace is not permitted between a symbolic pre/postfix operator and
  291. // its operand.
  292. if ((prefix ? tokens().HasTrailingWhitespace(*position_)
  293. : tokens().HasLeadingWhitespace(*position_))) {
  294. CARBON_DIAGNOSTIC(UnaryOperatorHasWhitespace, Error,
  295. "Whitespace is not allowed {0} this unary operator.",
  296. RelativeLoc);
  297. emitter_->Emit(*position_, UnaryOperatorHasWhitespace,
  298. prefix ? RelativeLoc::After : RelativeLoc::Before);
  299. } else if (IsLexicallyValidInfixOperator()) {
  300. // Pre/postfix operators must not satisfy the infix operator rules.
  301. CARBON_DIAGNOSTIC(UnaryOperatorRequiresWhitespace, Error,
  302. "Whitespace is required {0} this unary operator.",
  303. RelativeLoc);
  304. emitter_->Emit(*position_, UnaryOperatorRequiresWhitespace,
  305. prefix ? RelativeLoc::Before : RelativeLoc::After);
  306. }
  307. }
  308. }
  309. auto Context::ConsumeListToken(NodeKind comma_kind, Lex::TokenKind close_kind,
  310. bool already_has_error) -> ListTokenKind {
  311. if (!PositionIs(Lex::TokenKind::Comma) && !PositionIs(close_kind)) {
  312. // Don't error a second time on the same element.
  313. if (!already_has_error) {
  314. CARBON_DIAGNOSTIC(UnexpectedTokenAfterListElement, Error,
  315. "Expected `,` or `{0}`.", Lex::TokenKind);
  316. emitter_->Emit(*position_, UnexpectedTokenAfterListElement, close_kind);
  317. ReturnErrorOnState();
  318. }
  319. // Recover from the invalid token.
  320. auto end_of_element = FindNextOf({Lex::TokenKind::Comma, close_kind});
  321. // The lexer guarantees that parentheses are balanced.
  322. CARBON_CHECK(end_of_element)
  323. << "missing matching `" << close_kind.opening_symbol() << "` for `"
  324. << close_kind << "`";
  325. SkipTo(*end_of_element);
  326. }
  327. if (PositionIs(close_kind)) {
  328. return ListTokenKind::Close;
  329. } else {
  330. AddLeafNode(comma_kind, Consume());
  331. return PositionIs(close_kind) ? ListTokenKind::CommaClose
  332. : ListTokenKind::Comma;
  333. }
  334. }
  335. auto Context::AddNodeExpectingDeclSemi(StateStackEntry state,
  336. NodeKind node_kind,
  337. Lex::TokenKind decl_kind,
  338. bool is_def_allowed) -> void {
  339. // TODO: This could better handle things like:
  340. // base: { }
  341. // var n: i32;
  342. // ^ Ends up at `n`, instead of `var`.
  343. if (state.has_error) {
  344. RecoverFromDeclError(state, node_kind,
  345. /*skip_past_likely_end=*/true);
  346. return;
  347. }
  348. if (auto semi = ConsumeIf(Lex::TokenKind::Semi)) {
  349. AddNode(node_kind, *semi, /*has_error=*/false);
  350. } else {
  351. if (is_def_allowed) {
  352. DiagnoseExpectedDeclSemiOrDefinition(decl_kind);
  353. } else {
  354. DiagnoseExpectedDeclSemi(decl_kind);
  355. }
  356. RecoverFromDeclError(state, node_kind,
  357. /*skip_past_likely_end=*/true);
  358. }
  359. }
  360. auto Context::RecoverFromDeclError(StateStackEntry state, NodeKind node_kind,
  361. bool skip_past_likely_end) -> void {
  362. auto token = state.token;
  363. if (skip_past_likely_end) {
  364. token = SkipPastLikelyEnd(token);
  365. }
  366. AddNode(node_kind, token, /*has_error=*/true);
  367. }
  368. auto Context::ParseLibraryName(bool accept_default)
  369. -> std::optional<StringLiteralValueId> {
  370. if (auto library_name_token = ConsumeIf(Lex::TokenKind::StringLiteral)) {
  371. AddLeafNode(NodeKind::LibraryName, *library_name_token);
  372. return tokens().GetStringLiteralValue(*library_name_token);
  373. }
  374. if (accept_default) {
  375. if (auto default_token = ConsumeIf(Lex::TokenKind::Default)) {
  376. AddLeafNode(NodeKind::DefaultLibrary, *default_token);
  377. return StringLiteralValueId::Invalid;
  378. }
  379. }
  380. CARBON_DIAGNOSTIC(
  381. ExpectedLibraryNameOrDefault, Error,
  382. "Expected `default` or a string literal to specify the library name.");
  383. CARBON_DIAGNOSTIC(ExpectedLibraryName, Error,
  384. "Expected a string literal to specify the library name.");
  385. emitter().Emit(*position(), accept_default ? ExpectedLibraryNameOrDefault
  386. : ExpectedLibraryName);
  387. return std::nullopt;
  388. }
  389. auto Context::ParseLibrarySpecifier(bool accept_default)
  390. -> std::optional<StringLiteralValueId> {
  391. auto library_token = ConsumeChecked(Lex::TokenKind::Library);
  392. auto library_id = ParseLibraryName(accept_default);
  393. if (!library_id) {
  394. AddLeafNode(NodeKind::LibraryName, *position_, /*has_error=*/true);
  395. }
  396. AddNode(NodeKind::LibrarySpecifier, library_token, /*has_error=*/false);
  397. return library_id;
  398. }
  399. auto Context::DiagnoseExpectedDeclSemi(Lex::TokenKind expected_kind) -> void {
  400. CARBON_DIAGNOSTIC(ExpectedDeclSemi, Error,
  401. "`{0}` declarations must end with a `;`.", Lex::TokenKind);
  402. emitter().Emit(*position(), ExpectedDeclSemi, expected_kind);
  403. }
  404. auto Context::DiagnoseExpectedDeclSemiOrDefinition(Lex::TokenKind expected_kind)
  405. -> void {
  406. CARBON_DIAGNOSTIC(ExpectedDeclSemiOrDefinition, Error,
  407. "`{0}` declarations must either end with a `;` or "
  408. "have a `{{ ... }` block for a definition.",
  409. Lex::TokenKind);
  410. emitter().Emit(*position(), ExpectedDeclSemiOrDefinition, expected_kind);
  411. }
  412. // Returns whether we are currently parsing in a scope in which function
  413. // definitions are deferred, such as a class or interface.
  414. static auto ParsingInDeferredDefinitionScope(Context& context) -> bool {
  415. auto& stack = context.state_stack();
  416. if (stack.size() < 2 || stack.back().state != State::DeclScopeLoop) {
  417. return false;
  418. }
  419. auto state = stack[stack.size() - 2].state;
  420. return state == State::DeclDefinitionFinishAsClass ||
  421. state == State::DeclDefinitionFinishAsImpl ||
  422. state == State::DeclDefinitionFinishAsInterface ||
  423. state == State::DeclDefinitionFinishAsNamedConstraint;
  424. }
  425. auto Context::AddFunctionDefinitionStart(Lex::TokenIndex token, bool has_error)
  426. -> void {
  427. if (ParsingInDeferredDefinitionScope(*this)) {
  428. deferred_definition_stack_.push_back(tree_->deferred_definitions_.Add(
  429. {.start_id =
  430. FunctionDefinitionStartId(NodeId(tree_->node_impls_.size()))}));
  431. }
  432. AddNode(NodeKind::FunctionDefinitionStart, token, has_error);
  433. }
  434. auto Context::AddFunctionDefinition(Lex::TokenIndex token, bool has_error)
  435. -> void {
  436. if (ParsingInDeferredDefinitionScope(*this)) {
  437. auto definition_index = deferred_definition_stack_.pop_back_val();
  438. auto& definition = tree_->deferred_definitions_.Get(definition_index);
  439. definition.definition_id =
  440. FunctionDefinitionId(NodeId(tree_->node_impls_.size()));
  441. definition.next_definition_index =
  442. DeferredDefinitionIndex(tree_->deferred_definitions().size());
  443. }
  444. AddNode(NodeKind::FunctionDefinition, token, has_error);
  445. }
  446. auto Context::PrintForStackDump(llvm::raw_ostream& output) const -> void {
  447. output << "Parser stack:\n";
  448. for (auto [i, entry] : llvm::enumerate(state_stack_)) {
  449. output << "\t" << i << ".\t" << entry.state;
  450. PrintTokenForStackDump(output, entry.token);
  451. }
  452. output << "\tcursor\tposition_";
  453. PrintTokenForStackDump(output, *position_);
  454. }
  455. auto Context::PrintTokenForStackDump(llvm::raw_ostream& output,
  456. Lex::TokenIndex token) const -> void {
  457. output << " @ " << tokens_->GetLineNumber(tokens_->GetLine(token)) << ":"
  458. << tokens_->GetColumnNumber(token) << ": token " << token << " : "
  459. << tokens_->GetKind(token) << "\n";
  460. }
  461. } // namespace Carbon::Parse