context.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "toolchain/parse/context.h"
  5. #include <optional>
  6. #include "common/check.h"
  7. #include "common/ostream.h"
  8. #include "llvm/ADT/STLExtras.h"
  9. #include "toolchain/lex/token_kind.h"
  10. #include "toolchain/lex/tokenized_buffer.h"
  11. #include "toolchain/parse/node_kind.h"
  12. #include "toolchain/parse/tree.h"
  13. namespace Carbon::Parse {
  14. // A relative location for characters in errors.
  15. enum class RelativeLocation : int8_t {
  16. Around,
  17. After,
  18. Before,
  19. };
  20. } // namespace Carbon::Parse
  21. namespace llvm {
  22. // Adapts RelativeLocation for use with formatv.
  23. template <>
  24. struct format_provider<Carbon::Parse::RelativeLocation> {
  25. using RelativeLocation = Carbon::Parse::RelativeLocation;
  26. static void format(const RelativeLocation& loc, raw_ostream& out,
  27. StringRef /*style*/) {
  28. switch (loc) {
  29. case RelativeLocation::Around:
  30. out << "around";
  31. break;
  32. case RelativeLocation::After:
  33. out << "after";
  34. break;
  35. case RelativeLocation::Before:
  36. out << "before";
  37. break;
  38. }
  39. }
  40. };
  41. } // namespace llvm
  42. namespace Carbon::Parse {
  43. Context::Context(Tree& tree, Lex::TokenizedBuffer& tokens,
  44. Lex::TokenDiagnosticEmitter& emitter,
  45. llvm::raw_ostream* vlog_stream)
  46. : tree_(&tree),
  47. tokens_(&tokens),
  48. emitter_(&emitter),
  49. vlog_stream_(vlog_stream),
  50. position_(tokens_->tokens().begin()),
  51. end_(tokens_->tokens().end()) {
  52. CARBON_CHECK(position_ != end_) << "Empty TokenizedBuffer";
  53. --end_;
  54. CARBON_CHECK(tokens_->GetKind(*end_) == Lex::TokenKind::FileEnd)
  55. << "TokenizedBuffer should end with FileEnd, ended with "
  56. << tokens_->GetKind(*end_);
  57. }
  58. auto Context::AddLeafNode(NodeKind kind, Lex::TokenIndex token, bool has_error)
  59. -> void {
  60. CheckNodeMatchesLexerToken(kind, tokens_->GetKind(token), has_error);
  61. tree_->node_impls_.push_back(
  62. Tree::NodeImpl(kind, has_error, token, /*subtree_size=*/1));
  63. if (has_error) {
  64. tree_->has_errors_ = true;
  65. }
  66. }
  67. auto Context::AddNode(NodeKind kind, Lex::TokenIndex token, int subtree_start,
  68. bool has_error) -> void {
  69. CheckNodeMatchesLexerToken(kind, tokens_->GetKind(token), has_error);
  70. int subtree_size = tree_->size() - subtree_start + 1;
  71. tree_->node_impls_.push_back(
  72. Tree::NodeImpl(kind, has_error, token, subtree_size));
  73. if (has_error) {
  74. tree_->has_errors_ = true;
  75. }
  76. }
  77. auto Context::ReplacePlaceholderNode(int32_t position, NodeKind kind,
  78. Lex::TokenIndex token, bool has_error)
  79. -> void {
  80. CARBON_CHECK(position >= 0 && position < tree_->size())
  81. << "position: " << position << " size: " << tree_->size();
  82. auto* node_impl = &tree_->node_impls_[position];
  83. CARBON_CHECK(node_impl->subtree_size == 1);
  84. CARBON_CHECK(node_impl->kind == NodeKind::Placeholder);
  85. node_impl->kind = kind;
  86. node_impl->has_error = has_error;
  87. node_impl->token = token;
  88. if (has_error) {
  89. tree_->has_errors_ = true;
  90. }
  91. }
  92. auto Context::ConsumeAndAddOpenParen(Lex::TokenIndex default_token,
  93. NodeKind start_kind)
  94. -> std::optional<Lex::TokenIndex> {
  95. if (auto open_paren = ConsumeIf(Lex::TokenKind::OpenParen)) {
  96. AddLeafNode(start_kind, *open_paren, /*has_error=*/false);
  97. return open_paren;
  98. } else {
  99. CARBON_DIAGNOSTIC(ExpectedParenAfter, Error, "Expected `(` after `{0}`.",
  100. Lex::TokenKind);
  101. emitter_->Emit(*position_, ExpectedParenAfter,
  102. tokens().GetKind(default_token));
  103. AddLeafNode(start_kind, default_token, /*has_error=*/true);
  104. return std::nullopt;
  105. }
  106. }
  107. auto Context::ConsumeAndAddCloseSymbol(Lex::TokenIndex expected_open,
  108. StateStackEntry state,
  109. NodeKind close_kind) -> void {
  110. Lex::TokenKind open_token_kind = tokens().GetKind(expected_open);
  111. if (!open_token_kind.is_opening_symbol()) {
  112. AddNode(close_kind, state.token, state.subtree_start, /*has_error=*/true);
  113. } else if (auto close_token = ConsumeIf(open_token_kind.closing_symbol())) {
  114. AddNode(close_kind, *close_token, state.subtree_start, state.has_error);
  115. } else {
  116. // TODO: Include the location of the matching opening delimiter in the
  117. // diagnostic.
  118. CARBON_DIAGNOSTIC(ExpectedCloseSymbol, Error,
  119. "Unexpected tokens before `{0}`.", llvm::StringLiteral);
  120. emitter_->Emit(*position_, ExpectedCloseSymbol,
  121. open_token_kind.closing_symbol().fixed_spelling());
  122. SkipTo(tokens().GetMatchedClosingToken(expected_open));
  123. AddNode(close_kind, Consume(), state.subtree_start, /*has_error=*/true);
  124. }
  125. }
  126. auto Context::ConsumeAndAddLeafNodeIf(Lex::TokenKind token_kind,
  127. NodeKind node_kind) -> bool {
  128. auto token = ConsumeIf(token_kind);
  129. if (!token) {
  130. return false;
  131. }
  132. AddLeafNode(node_kind, *token);
  133. return true;
  134. }
  135. auto Context::ConsumeChecked(Lex::TokenKind kind) -> Lex::TokenIndex {
  136. CARBON_CHECK(PositionIs(kind))
  137. << "Required " << kind << ", found " << PositionKind();
  138. return Consume();
  139. }
  140. auto Context::ConsumeIf(Lex::TokenKind kind) -> std::optional<Lex::TokenIndex> {
  141. if (!PositionIs(kind)) {
  142. return std::nullopt;
  143. }
  144. return Consume();
  145. }
  146. auto Context::FindNextOf(std::initializer_list<Lex::TokenKind> desired_kinds)
  147. -> std::optional<Lex::TokenIndex> {
  148. auto new_position = position_;
  149. while (true) {
  150. Lex::TokenIndex token = *new_position;
  151. Lex::TokenKind kind = tokens().GetKind(token);
  152. if (kind.IsOneOf(desired_kinds)) {
  153. return token;
  154. }
  155. // Step to the next token at the current bracketing level.
  156. if (kind.is_closing_symbol() || kind == Lex::TokenKind::FileEnd) {
  157. // There are no more tokens at this level.
  158. return std::nullopt;
  159. } else if (kind.is_opening_symbol()) {
  160. new_position = Lex::TokenIterator(tokens().GetMatchedClosingToken(token));
  161. // Advance past the closing token.
  162. ++new_position;
  163. } else {
  164. ++new_position;
  165. }
  166. }
  167. }
  168. auto Context::SkipMatchingGroup() -> bool {
  169. if (!PositionKind().is_opening_symbol()) {
  170. return false;
  171. }
  172. SkipTo(tokens().GetMatchedClosingToken(*position_));
  173. ++position_;
  174. return true;
  175. }
  176. auto Context::SkipPastLikelyEnd(Lex::TokenIndex skip_root)
  177. -> std::optional<Lex::TokenIndex> {
  178. if (position_ == end_) {
  179. return std::nullopt;
  180. }
  181. Lex::LineIndex root_line = tokens().GetLine(skip_root);
  182. int root_line_indent = tokens().GetIndentColumnNumber(root_line);
  183. // We will keep scanning through tokens on the same line as the root or
  184. // lines with greater indentation than root's line.
  185. auto is_same_line_or_indent_greater_than_root = [&](Lex::TokenIndex t) {
  186. Lex::LineIndex l = tokens().GetLine(t);
  187. if (l == root_line) {
  188. return true;
  189. }
  190. return tokens().GetIndentColumnNumber(l) > root_line_indent;
  191. };
  192. do {
  193. if (PositionIs(Lex::TokenKind::CloseCurlyBrace)) {
  194. // Immediately bail out if we hit an unmatched close curly, this will
  195. // pop us up a level of the syntax grouping.
  196. return std::nullopt;
  197. }
  198. // We assume that a semicolon is always intended to be the end of the
  199. // current construct.
  200. if (auto semi = ConsumeIf(Lex::TokenKind::Semi)) {
  201. return semi;
  202. }
  203. // Skip over any matching group of tokens().
  204. if (SkipMatchingGroup()) {
  205. continue;
  206. }
  207. // Otherwise just step forward one token.
  208. ++position_;
  209. } while (position_ != end_ &&
  210. is_same_line_or_indent_greater_than_root(*position_));
  211. return std::nullopt;
  212. }
  213. auto Context::SkipTo(Lex::TokenIndex t) -> void {
  214. CARBON_CHECK(t >= *position_) << "Tried to skip backwards from " << position_
  215. << " to " << Lex::TokenIterator(t);
  216. position_ = Lex::TokenIterator(t);
  217. CARBON_CHECK(position_ != end_) << "Skipped past EOF.";
  218. }
  219. // Determines whether the given token is considered to be the start of an
  220. // operand according to the rules for infix operator parsing.
  221. static auto IsAssumedStartOfOperand(Lex::TokenKind kind) -> bool {
  222. return kind.IsOneOf({Lex::TokenKind::OpenParen, Lex::TokenKind::Identifier,
  223. Lex::TokenKind::IntLiteral, Lex::TokenKind::RealLiteral,
  224. Lex::TokenKind::StringLiteral});
  225. }
  226. // Determines whether the given token is considered to be the end of an
  227. // operand according to the rules for infix operator parsing.
  228. static auto IsAssumedEndOfOperand(Lex::TokenKind kind) -> bool {
  229. return kind.IsOneOf(
  230. {Lex::TokenKind::CloseParen, Lex::TokenKind::CloseCurlyBrace,
  231. Lex::TokenKind::CloseSquareBracket, Lex::TokenKind::Identifier,
  232. Lex::TokenKind::IntLiteral, Lex::TokenKind::RealLiteral,
  233. Lex::TokenKind::StringLiteral});
  234. }
  235. // Determines whether the given token could possibly be the start of an
  236. // operand. This is conservatively correct, and will never incorrectly return
  237. // `false`, but can incorrectly return `true`.
  238. static auto IsPossibleStartOfOperand(Lex::TokenKind kind) -> bool {
  239. return !kind.IsOneOf(
  240. {Lex::TokenKind::CloseParen, Lex::TokenKind::CloseCurlyBrace,
  241. Lex::TokenKind::CloseSquareBracket, Lex::TokenKind::Comma,
  242. Lex::TokenKind::Semi, Lex::TokenKind::Colon});
  243. }
  244. auto Context::IsLexicallyValidInfixOperator() -> bool {
  245. CARBON_CHECK(position_ != end_) << "Expected an operator token.";
  246. bool leading_space = tokens().HasLeadingWhitespace(*position_);
  247. bool trailing_space = tokens().HasTrailingWhitespace(*position_);
  248. // If there's whitespace on both sides, it's an infix operator.
  249. if (leading_space && trailing_space) {
  250. return true;
  251. }
  252. // If there's whitespace on exactly one side, it's not an infix operator.
  253. if (leading_space || trailing_space) {
  254. return false;
  255. }
  256. // Otherwise, for an infix operator, the preceding token must be any close
  257. // bracket, identifier, or literal and the next token must be an open paren,
  258. // identifier, or literal.
  259. if (position_ == tokens().tokens().begin() ||
  260. !IsAssumedEndOfOperand(tokens().GetKind(*(position_ - 1))) ||
  261. !IsAssumedStartOfOperand(tokens().GetKind(*(position_ + 1)))) {
  262. return false;
  263. }
  264. return true;
  265. }
  266. auto Context::IsTrailingOperatorInfix() -> bool {
  267. if (position_ == end_) {
  268. return false;
  269. }
  270. // An operator that follows the infix operator rules is parsed as
  271. // infix, unless the next token means that it can't possibly be.
  272. if (IsLexicallyValidInfixOperator() &&
  273. IsPossibleStartOfOperand(tokens().GetKind(*(position_ + 1)))) {
  274. return true;
  275. }
  276. // A trailing operator with leading whitespace that's not valid as infix is
  277. // not valid at all. If the next token looks like the start of an operand,
  278. // then parse as infix, otherwise as postfix. Either way we'll produce a
  279. // diagnostic later on.
  280. if (tokens().HasLeadingWhitespace(*position_) &&
  281. IsAssumedStartOfOperand(tokens().GetKind(*(position_ + 1)))) {
  282. return true;
  283. }
  284. return false;
  285. }
  286. auto Context::DiagnoseOperatorFixity(OperatorFixity fixity) -> void {
  287. if (!PositionKind().is_symbol()) {
  288. // Whitespace-based fixity rules only apply to symbolic operators.
  289. return;
  290. }
  291. if (fixity == OperatorFixity::Infix) {
  292. // Infix operators must satisfy the infix operator rules.
  293. if (!IsLexicallyValidInfixOperator()) {
  294. CARBON_DIAGNOSTIC(BinaryOperatorRequiresWhitespace, Error,
  295. "Whitespace missing {0} binary operator.",
  296. RelativeLocation);
  297. emitter_->Emit(*position_, BinaryOperatorRequiresWhitespace,
  298. tokens().HasLeadingWhitespace(*position_)
  299. ? RelativeLocation::After
  300. : (tokens().HasTrailingWhitespace(*position_)
  301. ? RelativeLocation::Before
  302. : RelativeLocation::Around));
  303. }
  304. } else {
  305. bool prefix = fixity == OperatorFixity::Prefix;
  306. // Whitespace is not permitted between a symbolic pre/postfix operator and
  307. // its operand.
  308. if ((prefix ? tokens().HasTrailingWhitespace(*position_)
  309. : tokens().HasLeadingWhitespace(*position_))) {
  310. CARBON_DIAGNOSTIC(UnaryOperatorHasWhitespace, Error,
  311. "Whitespace is not allowed {0} this unary operator.",
  312. RelativeLocation);
  313. emitter_->Emit(
  314. *position_, UnaryOperatorHasWhitespace,
  315. prefix ? RelativeLocation::After : RelativeLocation::Before);
  316. } else if (IsLexicallyValidInfixOperator()) {
  317. // Pre/postfix operators must not satisfy the infix operator rules.
  318. CARBON_DIAGNOSTIC(UnaryOperatorRequiresWhitespace, Error,
  319. "Whitespace is required {0} this unary operator.",
  320. RelativeLocation);
  321. emitter_->Emit(
  322. *position_, UnaryOperatorRequiresWhitespace,
  323. prefix ? RelativeLocation::Before : RelativeLocation::After);
  324. }
  325. }
  326. }
  327. auto Context::ConsumeListToken(NodeKind comma_kind, Lex::TokenKind close_kind,
  328. bool already_has_error) -> ListTokenKind {
  329. if (!PositionIs(Lex::TokenKind::Comma) && !PositionIs(close_kind)) {
  330. // Don't error a second time on the same element.
  331. if (!already_has_error) {
  332. CARBON_DIAGNOSTIC(UnexpectedTokenAfterListElement, Error,
  333. "Expected `,` or `{0}`.", Lex::TokenKind);
  334. emitter_->Emit(*position_, UnexpectedTokenAfterListElement, close_kind);
  335. ReturnErrorOnState();
  336. }
  337. // Recover from the invalid token.
  338. auto end_of_element = FindNextOf({Lex::TokenKind::Comma, close_kind});
  339. // The lexer guarantees that parentheses are balanced.
  340. CARBON_CHECK(end_of_element)
  341. << "missing matching `" << close_kind.opening_symbol() << "` for `"
  342. << close_kind << "`";
  343. SkipTo(*end_of_element);
  344. }
  345. if (PositionIs(close_kind)) {
  346. return ListTokenKind::Close;
  347. } else {
  348. AddLeafNode(comma_kind, Consume());
  349. return PositionIs(close_kind) ? ListTokenKind::CommaClose
  350. : ListTokenKind::Comma;
  351. }
  352. }
  353. auto Context::RecoverFromDeclError(StateStackEntry state,
  354. NodeKind parse_node_kind,
  355. bool skip_past_likely_end) -> void {
  356. auto token = state.token;
  357. if (skip_past_likely_end) {
  358. if (auto semi = SkipPastLikelyEnd(token)) {
  359. token = *semi;
  360. }
  361. }
  362. AddNode(parse_node_kind, token, state.subtree_start,
  363. /*has_error=*/true);
  364. }
  365. auto Context::EmitExpectedDeclSemi(Lex::TokenKind expected_kind) -> void {
  366. CARBON_DIAGNOSTIC(ExpectedDeclSemi, Error,
  367. "`{0}` declarations must end with a `;`.", Lex::TokenKind);
  368. emitter().Emit(*position(), ExpectedDeclSemi, expected_kind);
  369. }
  370. auto Context::EmitExpectedDeclSemiOrDefinition(Lex::TokenKind expected_kind)
  371. -> void {
  372. CARBON_DIAGNOSTIC(ExpectedDeclSemiOrDefinition, Error,
  373. "`{0}` declarations must either end with a `;` or "
  374. "have a `{{ ... }` block for a definition.",
  375. Lex::TokenKind);
  376. emitter().Emit(*position(), ExpectedDeclSemiOrDefinition, expected_kind);
  377. }
  378. auto Context::PrintForStackDump(llvm::raw_ostream& output) const -> void {
  379. output << "Parser stack:\n";
  380. for (auto [i, entry] : llvm::enumerate(state_stack_)) {
  381. output << "\t" << i << ".\t" << entry.state;
  382. PrintTokenForStackDump(output, entry.token);
  383. }
  384. output << "\tcursor\tposition_";
  385. PrintTokenForStackDump(output, *position_);
  386. }
  387. auto Context::PrintTokenForStackDump(llvm::raw_ostream& output,
  388. Lex::TokenIndex token) const -> void {
  389. output << " @ " << tokens_->GetLineNumber(tokens_->GetLine(token)) << ":"
  390. << tokens_->GetColumnNumber(token) << ": token " << token << " : "
  391. << tokens_->GetKind(token) << "\n";
  392. }
  393. } // namespace Carbon::Parse