parser_impl.cpp 49 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "toolchain/parser/parser_impl.h"
  5. #include <cstdlib>
  6. #include "common/check.h"
  7. #include "llvm/ADT/Optional.h"
  8. #include "llvm/Support/FormatVariadic.h"
  9. #include "llvm/Support/raw_ostream.h"
  10. #include "toolchain/lexer/token_kind.h"
  11. #include "toolchain/lexer/tokenized_buffer.h"
  12. #include "toolchain/parser/parse_node_kind.h"
  13. #include "toolchain/parser/parse_tree.h"
  14. namespace Carbon {
  15. CARBON_DIAGNOSTIC(ExpectedSemiAfterExpression, Error,
  16. "Expected `;` after expression.");
  17. // Manages the parser's stack depth, particularly decrementing on destruction.
  18. // This should only be instantiated through RETURN_IF_STACK_LIMITED.
  19. class ParseTree::Parser::ScopedStackStep {
  20. public:
  21. explicit ScopedStackStep(ParseTree::Parser* parser) : parser_(parser) {
  22. ++parser_->stack_depth_;
  23. }
  24. ~ScopedStackStep() { --parser_->stack_depth_; }
  25. auto VerifyUnderLimit() -> bool {
  26. if (parser_->stack_depth_ >= StackDepthLimit) {
  27. CARBON_DIAGNOSTIC(StackLimitExceeded, Error,
  28. "Exceeded recursion limit ({0})", int);
  29. parser_->emitter_.Emit(*parser_->position_, StackLimitExceeded,
  30. ParseTree::StackDepthLimit);
  31. return false;
  32. }
  33. return true;
  34. }
  35. private:
  36. ParseTree::Parser* parser_;
  37. };
  38. // Encapsulates checking the stack and erroring if needed. This should be called
  39. // at the start of every parse function.
  40. #define CARBON_RETURN_IF_STACK_LIMITED(error_return_expr) \
  41. ScopedStackStep scoped_stack_step(this); \
  42. if (!scoped_stack_step.VerifyUnderLimit()) { \
  43. return (error_return_expr); \
  44. }
  45. // A relative location for characters in errors.
  46. enum class RelativeLocation : int8_t {
  47. Around,
  48. After,
  49. Before,
  50. };
  51. // Adapts RelativeLocation for use with formatv.
  52. static auto operator<<(llvm::raw_ostream& out, RelativeLocation loc)
  53. -> llvm::raw_ostream& {
  54. switch (loc) {
  55. case RelativeLocation::Around:
  56. out << "around";
  57. break;
  58. case RelativeLocation::After:
  59. out << "after";
  60. break;
  61. case RelativeLocation::Before:
  62. out << "before";
  63. break;
  64. }
  65. return out;
  66. }
  67. ParseTree::Parser::Parser(ParseTree& tree_arg, TokenizedBuffer& tokens_arg,
  68. TokenDiagnosticEmitter& emitter)
  69. : tree_(tree_arg),
  70. tokens_(tokens_arg),
  71. emitter_(emitter),
  72. position_(tokens_.tokens().begin()),
  73. end_(tokens_.tokens().end()) {
  74. CARBON_CHECK(std::find_if(position_, end_,
  75. [&](TokenizedBuffer::Token t) {
  76. return tokens_.GetKind(t) ==
  77. TokenKind::EndOfFile();
  78. }) != end_)
  79. << "No EndOfFileToken in token buffer.";
  80. }
  81. auto ParseTree::Parser::Parse(TokenizedBuffer& tokens,
  82. TokenDiagnosticEmitter& emitter) -> ParseTree {
  83. ParseTree tree(tokens);
  84. // Reserve the space we expect to need for nodes in order to avoid allocation
  85. // and copying overhead. This should be a one-to-one correspondence in an
  86. // error-free tree.
  87. tree.node_impls_.reserve(tokens.size());
  88. Parser parser(tree, tokens, emitter);
  89. while (!parser.AtEndOfFile()) {
  90. if (!parser.ParseDeclaration()) {
  91. // We don't have an enclosing parse tree node to mark as erroneous, so
  92. // just mark the tree as a whole.
  93. tree.has_errors_ = true;
  94. }
  95. }
  96. parser.AddLeafNode(ParseNodeKind::FileEnd(), *parser.position_);
  97. CARBON_CHECK(tree.has_errors() || tree.size() == tokens.size())
  98. << "Failed to correctly calculate size: expected " << tokens.size()
  99. << ", got " << tree.size();
  100. CARBON_CHECK(tree.Verify()) << "Parse tree built but does not verify!";
  101. return tree;
  102. }
  103. auto ParseTree::Parser::Consume(TokenKind kind) -> TokenizedBuffer::Token {
  104. CARBON_CHECK(kind != TokenKind::EndOfFile())
  105. << "Cannot consume the EOF token!";
  106. CARBON_CHECK(NextTokenIs(kind)) << "The current token is the wrong kind!";
  107. TokenizedBuffer::Token t = *position_;
  108. ++position_;
  109. CARBON_CHECK(position_ != end_)
  110. << "Reached end of tokens without finding EOF token.";
  111. return t;
  112. }
  113. auto ParseTree::Parser::ConsumeIf(TokenKind kind)
  114. -> llvm::Optional<TokenizedBuffer::Token> {
  115. if (!NextTokenIs(kind)) {
  116. return {};
  117. }
  118. return Consume(kind);
  119. }
  120. auto ParseTree::Parser::AddLeafNode(ParseNodeKind kind,
  121. TokenizedBuffer::Token token,
  122. bool has_error) -> Node {
  123. Node n(tree_.node_impls_.size());
  124. tree_.node_impls_.push_back(NodeImpl(kind, token, /*subtree_size_arg=*/1));
  125. if (has_error) {
  126. MarkNodeError(n);
  127. }
  128. return n;
  129. }
  130. auto ParseTree::Parser::ConsumeAndAddLeafNodeIf(TokenKind t_kind,
  131. ParseNodeKind n_kind)
  132. -> llvm::Optional<Node> {
  133. auto t = ConsumeIf(t_kind);
  134. if (!t) {
  135. return {};
  136. }
  137. return AddLeafNode(n_kind, *t);
  138. }
  139. auto ParseTree::Parser::MarkNodeError(Node n) -> void {
  140. tree_.node_impls_[n.index_].has_error = true;
  141. tree_.has_errors_ = true;
  142. }
  143. // A marker for the start of a node's subtree.
  144. //
  145. // This is used to track the size of the node's subtree. It can be used
  146. // repeatedly if multiple subtrees start at the same position.
  147. struct ParseTree::Parser::SubtreeStart {
  148. int tree_size;
  149. };
  150. auto ParseTree::Parser::GetSubtreeStartPosition() -> SubtreeStart {
  151. return {static_cast<int>(tree_.node_impls_.size())};
  152. }
  153. auto ParseTree::Parser::AddNode(ParseNodeKind n_kind, TokenizedBuffer::Token t,
  154. SubtreeStart start, bool has_error) -> Node {
  155. // The size of the subtree is the change in size from when we started this
  156. // subtree to now, but including the node we're about to add.
  157. int tree_stop_size = static_cast<int>(tree_.node_impls_.size()) + 1;
  158. int subtree_size = tree_stop_size - start.tree_size;
  159. Node n(tree_.node_impls_.size());
  160. tree_.node_impls_.push_back(NodeImpl(n_kind, t, subtree_size));
  161. if (has_error) {
  162. MarkNodeError(n);
  163. }
  164. return n;
  165. }
  166. auto ParseTree::Parser::SkipMatchingGroup() -> bool {
  167. TokenizedBuffer::Token t = *position_;
  168. TokenKind t_kind = tokens_.GetKind(t);
  169. if (!t_kind.IsOpeningSymbol()) {
  170. return false;
  171. }
  172. SkipTo(tokens_.GetMatchedClosingToken(t));
  173. Consume(t_kind.GetClosingSymbol());
  174. return true;
  175. }
  176. auto ParseTree::Parser::SkipTo(TokenizedBuffer::Token t) -> void {
  177. CARBON_CHECK(t >= *position_) << "Tried to skip backwards.";
  178. position_ = TokenizedBuffer::TokenIterator(t);
  179. CARBON_CHECK(position_ != end_) << "Skipped past EOF.";
  180. }
  181. auto ParseTree::Parser::FindNextOf(
  182. std::initializer_list<TokenKind> desired_kinds)
  183. -> llvm::Optional<TokenizedBuffer::Token> {
  184. auto new_position = position_;
  185. while (true) {
  186. TokenizedBuffer::Token token = *new_position;
  187. TokenKind kind = tokens_.GetKind(token);
  188. if (kind.IsOneOf(desired_kinds)) {
  189. return token;
  190. }
  191. // Step to the next token at the current bracketing level.
  192. if (kind.IsClosingSymbol() || kind == TokenKind::EndOfFile()) {
  193. // There are no more tokens at this level.
  194. return llvm::None;
  195. } else if (kind.IsOpeningSymbol()) {
  196. new_position =
  197. TokenizedBuffer::TokenIterator(tokens_.GetMatchedClosingToken(token));
  198. // Advance past the closing token.
  199. ++new_position;
  200. } else {
  201. ++new_position;
  202. }
  203. }
  204. }
  205. auto ParseTree::Parser::SkipPastLikelyEnd(TokenizedBuffer::Token skip_root)
  206. -> llvm::Optional<TokenizedBuffer::Token> {
  207. if (AtEndOfFile()) {
  208. return llvm::None;
  209. }
  210. TokenizedBuffer::Line root_line = tokens_.GetLine(skip_root);
  211. int root_line_indent = tokens_.GetIndentColumnNumber(root_line);
  212. // We will keep scanning through tokens on the same line as the root or
  213. // lines with greater indentation than root's line.
  214. auto is_same_line_or_indent_greater_than_root =
  215. [&](TokenizedBuffer::Token t) {
  216. TokenizedBuffer::Line l = tokens_.GetLine(t);
  217. if (l == root_line) {
  218. return true;
  219. }
  220. return tokens_.GetIndentColumnNumber(l) > root_line_indent;
  221. };
  222. do {
  223. if (NextTokenKind() == TokenKind::CloseCurlyBrace()) {
  224. // Immediately bail out if we hit an unmatched close curly, this will
  225. // pop us up a level of the syntax grouping.
  226. return llvm::None;
  227. }
  228. // We assume that a semicolon is always intended to be the end of the
  229. // current construct.
  230. if (auto semi = ConsumeIf(TokenKind::Semi())) {
  231. return semi;
  232. }
  233. // Skip over any matching group of tokens_.
  234. if (SkipMatchingGroup()) {
  235. continue;
  236. }
  237. // Otherwise just step forward one token.
  238. Consume(NextTokenKind());
  239. } while (!AtEndOfFile() &&
  240. is_same_line_or_indent_greater_than_root(*position_));
  241. return llvm::None;
  242. }
  243. auto ParseTree::Parser::ParseCloseParen(TokenizedBuffer::Token open_paren,
  244. ParseNodeKind kind)
  245. -> llvm::Optional<Node> {
  246. if (auto close_paren =
  247. ConsumeAndAddLeafNodeIf(TokenKind::CloseParen(), kind)) {
  248. return close_paren;
  249. }
  250. // TODO: Include the location of the matching open_paren in the diagnostic.
  251. CARBON_DIAGNOSTIC(ExpectedCloseParen, Error, "Unexpected tokens before `)`.");
  252. emitter_.Emit(*position_, ExpectedCloseParen);
  253. SkipTo(tokens_.GetMatchedClosingToken(open_paren));
  254. AddLeafNode(kind, Consume(TokenKind::CloseParen()));
  255. return llvm::None;
  256. }
  257. template <typename ListElementParser, typename ListCompletionHandler>
  258. auto ParseTree::Parser::ParseList(TokenKind open, TokenKind close,
  259. ListElementParser list_element_parser,
  260. ParseNodeKind comma_kind,
  261. ListCompletionHandler list_handler,
  262. bool allow_trailing_comma)
  263. -> llvm::Optional<Node> {
  264. // `(` element-list[opt] `)`
  265. //
  266. // element-list ::= element
  267. // ::= element `,` element-list
  268. TokenizedBuffer::Token open_paren = Consume(open);
  269. bool has_errors = false;
  270. bool any_commas = false;
  271. int64_t num_elements = 0;
  272. // Parse elements, if any are specified.
  273. if (!NextTokenIs(close)) {
  274. while (true) {
  275. bool element_error = !list_element_parser();
  276. has_errors |= element_error;
  277. ++num_elements;
  278. if (!NextTokenIsOneOf({close, TokenKind::Comma()})) {
  279. if (!element_error) {
  280. CARBON_DIAGNOSTIC(UnexpectedTokenAfterListElement, Error,
  281. "Expected `,` or `{0}`.", TokenKind);
  282. emitter_.Emit(*position_, UnexpectedTokenAfterListElement, close);
  283. }
  284. has_errors = true;
  285. auto end_of_element = FindNextOf({TokenKind::Comma(), close});
  286. // The lexer guarantees that parentheses are balanced.
  287. CARBON_CHECK(end_of_element) << "missing matching `)` for `(`";
  288. SkipTo(*end_of_element);
  289. }
  290. if (NextTokenIs(close)) {
  291. break;
  292. }
  293. AddLeafNode(comma_kind, Consume(TokenKind::Comma()));
  294. any_commas = true;
  295. if (allow_trailing_comma && NextTokenIs(close)) {
  296. break;
  297. }
  298. }
  299. }
  300. bool is_single_item = num_elements == 1 && !any_commas;
  301. return list_handler(open_paren, is_single_item, Consume(close), has_errors);
  302. }
  303. auto ParseTree::Parser::ParsePattern(PatternKind kind) -> llvm::Optional<Node> {
  304. CARBON_RETURN_IF_STACK_LIMITED(llvm::None);
  305. if (NextTokenIs(TokenKind::Identifier()) &&
  306. tokens_.GetKind(*(position_ + 1)) == TokenKind::Colon()) {
  307. // identifier `:` type
  308. auto start = GetSubtreeStartPosition();
  309. AddLeafNode(ParseNodeKind::DeclaredName(),
  310. Consume(TokenKind::Identifier()));
  311. auto colon = Consume(TokenKind::Colon());
  312. auto type = ParseType();
  313. return AddNode(ParseNodeKind::PatternBinding(), colon, start,
  314. /*has_error=*/!type);
  315. }
  316. switch (kind) {
  317. case PatternKind::Parameter:
  318. CARBON_DIAGNOSTIC(ExpectedParameterName, Error,
  319. "Expected parameter declaration.");
  320. emitter_.Emit(*position_, ExpectedParameterName);
  321. break;
  322. case PatternKind::Variable:
  323. CARBON_DIAGNOSTIC(ExpectedVariableName, Error,
  324. "Expected pattern in `var` declaration.");
  325. emitter_.Emit(*position_, ExpectedVariableName);
  326. break;
  327. }
  328. return llvm::None;
  329. }
  330. auto ParseTree::Parser::ParseFunctionParameter() -> llvm::Optional<Node> {
  331. CARBON_RETURN_IF_STACK_LIMITED(llvm::None);
  332. return ParsePattern(PatternKind::Parameter);
  333. }
  334. auto ParseTree::Parser::ParseFunctionSignature() -> bool {
  335. CARBON_RETURN_IF_STACK_LIMITED(false);
  336. auto start = GetSubtreeStartPosition();
  337. auto params = ParseParenList(
  338. [&] { return ParseFunctionParameter(); },
  339. ParseNodeKind::ParameterListComma(),
  340. [&](TokenizedBuffer::Token open_paren, bool /*is_single_item*/,
  341. TokenizedBuffer::Token close_paren, bool has_errors) {
  342. AddLeafNode(ParseNodeKind::ParameterListEnd(), close_paren);
  343. return AddNode(ParseNodeKind::ParameterList(), open_paren, start,
  344. has_errors);
  345. });
  346. auto start_return_type = GetSubtreeStartPosition();
  347. if (auto arrow = ConsumeIf(TokenKind::MinusGreater())) {
  348. auto return_type = ParseType();
  349. AddNode(ParseNodeKind::ReturnType(), *arrow, start_return_type,
  350. /*has_error=*/!return_type);
  351. if (!return_type) {
  352. return false;
  353. }
  354. }
  355. return params.has_value();
  356. }
  357. auto ParseTree::Parser::ParseCodeBlock() -> llvm::Optional<Node> {
  358. return ParseCodeBlock(GetSubtreeStartPosition(),
  359. ParseNodeKind::CodeBlockStart(),
  360. ParseNodeKind::CodeBlock());
  361. }
  362. auto ParseTree::Parser::ParseCodeBlock(SubtreeStart subtree_start,
  363. ParseNodeKind start_kind,
  364. ParseNodeKind end_kind)
  365. -> llvm::Optional<Node> {
  366. CARBON_RETURN_IF_STACK_LIMITED(llvm::None);
  367. llvm::Optional<TokenizedBuffer::Token> open_curly =
  368. ConsumeIf(TokenKind::OpenCurlyBrace());
  369. if (!open_curly) {
  370. // Recover by parsing a single statement.
  371. CARBON_DIAGNOSTIC(ExpectedCodeBlock, Error, "Expected braced code block.");
  372. emitter_.Emit(*position_, ExpectedCodeBlock);
  373. // Use the unexpected token for the block start and end.
  374. TokenizedBuffer::Token recovery_start = *position_;
  375. AddNode(start_kind, recovery_start, subtree_start, /*has_error=*/true);
  376. ParseStatement();
  377. return AddNode(end_kind, recovery_start, subtree_start, /*has_error=*/true);
  378. }
  379. AddNode(start_kind, *open_curly, subtree_start);
  380. // Loop over all the different possibly nested elements in the code block.
  381. bool has_error = false;
  382. while (!NextTokenIs(TokenKind::CloseCurlyBrace())) {
  383. if (!ParseStatement()) {
  384. // We detected and diagnosed an error of some kind. We can trivially
  385. // skip to the actual close curly brace from here.
  386. // TODO: It would be better to skip to the next semicolon, or the next
  387. // token at the start of a line with the same indent as this one.
  388. SkipTo(tokens_.GetMatchedClosingToken(*open_curly));
  389. has_error = true;
  390. break;
  391. }
  392. }
  393. // We always reach here having set our position in the token stream to the
  394. // close curly brace.
  395. return AddNode(end_kind, Consume(TokenKind::CloseCurlyBrace()), subtree_start,
  396. /*has_error=*/has_error);
  397. }
  398. auto ParseTree::Parser::ParsePackageDirective() -> Node {
  399. TokenizedBuffer::Token package_intro_token = Consume(TokenKind::Package());
  400. auto package_start = GetSubtreeStartPosition();
  401. auto create_error_node = [&]() {
  402. return AddNode(ParseNodeKind::PackageDirective(), package_intro_token,
  403. package_start,
  404. /*has_error=*/true);
  405. };
  406. CARBON_RETURN_IF_STACK_LIMITED(create_error_node());
  407. auto exit_on_parse_error = [&]() {
  408. if (auto semi_token = SkipPastLikelyEnd(package_intro_token)) {
  409. AddLeafNode(ParseNodeKind::PackageEnd(), *semi_token);
  410. }
  411. return create_error_node();
  412. };
  413. if (!NextTokenIs(TokenKind::Identifier())) {
  414. CARBON_DIAGNOSTIC(ExpectedIdentifierAfterPackage, Error,
  415. "Expected identifier after `package`.");
  416. emitter_.Emit(*position_, ExpectedIdentifierAfterPackage);
  417. return exit_on_parse_error();
  418. }
  419. AddLeafNode(ParseNodeKind::DeclaredName(), Consume(TokenKind::Identifier()));
  420. bool library_parsed = false;
  421. if (tokens_.GetKind(*(position_)) == TokenKind::Library()) {
  422. auto library_start = GetSubtreeStartPosition();
  423. auto library_decl_token = Consume(TokenKind::Library());
  424. if (tokens_.GetKind(*(position_)) != TokenKind::StringLiteral()) {
  425. CARBON_DIAGNOSTIC(
  426. ExpectedLibraryName, Error,
  427. "Expected a string literal to specify the library name.");
  428. emitter_.Emit(*position_, ExpectedLibraryName);
  429. return exit_on_parse_error();
  430. }
  431. AddLeafNode(ParseNodeKind::Literal(), Consume(TokenKind::StringLiteral()));
  432. AddNode(ParseNodeKind::PackageLibrary(), library_decl_token, library_start,
  433. /*has_error=*/false);
  434. library_parsed = true;
  435. }
  436. auto api_or_impl_token = tokens_.GetKind(*(position_));
  437. if (api_or_impl_token == TokenKind::Api()) {
  438. AddLeafNode(ParseNodeKind::PackageApi(), Consume(TokenKind::Api()));
  439. } else if (api_or_impl_token == TokenKind::Impl()) {
  440. AddLeafNode(ParseNodeKind::PackageImpl(), Consume(TokenKind::Impl()));
  441. } else if (!library_parsed &&
  442. api_or_impl_token == TokenKind::StringLiteral()) {
  443. // If we come acroess a string literal and we didn't parse `library "..."`
  444. // yet, then most probably the user forgot to add `library` before the
  445. // library name.
  446. CARBON_DIAGNOSTIC(MissingLibraryKeyword, Error,
  447. "Missing `library` keyword.");
  448. emitter_.Emit(*position_, MissingLibraryKeyword);
  449. return exit_on_parse_error();
  450. } else {
  451. CARBON_DIAGNOSTIC(ExpectedApiOrImpl, Error, "Expected a `api` or `impl`.");
  452. emitter_.Emit(*position_, ExpectedApiOrImpl);
  453. return exit_on_parse_error();
  454. }
  455. if (tokens_.GetKind(*(position_)) != TokenKind::Semi()) {
  456. CARBON_DIAGNOSTIC(ExpectedSemiToEndPackageDirective, Error,
  457. "Expected `;` to end package directive.");
  458. emitter_.Emit(*position_, ExpectedSemiToEndPackageDirective);
  459. return exit_on_parse_error();
  460. }
  461. AddLeafNode(ParseNodeKind::PackageEnd(), Consume(TokenKind::Semi()));
  462. return AddNode(ParseNodeKind::PackageDirective(), package_intro_token,
  463. package_start, /*has_error=*/false);
  464. }
  465. auto ParseTree::Parser::ParseFunctionDeclaration() -> Node {
  466. auto start = GetSubtreeStartPosition();
  467. TokenizedBuffer::Token function_intro_token = Consume(TokenKind::Fn());
  468. AddLeafNode(ParseNodeKind::FunctionIntroducer(), function_intro_token);
  469. // When handling errors before the start of the definition, treat it as a
  470. // declaration. Recover to a semicolon when it makes sense as a possible
  471. // function end, otherwise use the fn token for the error.
  472. auto add_error_function_node = [&](bool skip_past_likely_end) {
  473. if (skip_past_likely_end) {
  474. if (auto semi_token = SkipPastLikelyEnd(function_intro_token)) {
  475. return AddNode(ParseNodeKind::FunctionDeclaration(), *semi_token, start,
  476. /*has_error=*/true);
  477. }
  478. }
  479. return AddNode(ParseNodeKind::FunctionDeclaration(), function_intro_token,
  480. start, /*has_error=*/true);
  481. };
  482. CARBON_RETURN_IF_STACK_LIMITED(add_error_function_node(false));
  483. if (!ConsumeAndAddLeafNodeIf(TokenKind::Identifier(),
  484. ParseNodeKind::DeclaredName())) {
  485. CARBON_DIAGNOSTIC(ExpectedFunctionName, Error,
  486. "Expected function name after `fn` keyword.");
  487. emitter_.Emit(*position_, ExpectedFunctionName);
  488. // TODO: We could change the lexer to allow us to synthesize certain
  489. // kinds of tokens and try to "recover" here, but unclear that this is
  490. // really useful.
  491. return add_error_function_node(true);
  492. }
  493. TokenizedBuffer::Token open_paren = *position_;
  494. if (tokens_.GetKind(open_paren) != TokenKind::OpenParen()) {
  495. CARBON_DIAGNOSTIC(ExpectedFunctionParams, Error,
  496. "Expected `(` after function name.");
  497. emitter_.Emit(open_paren, ExpectedFunctionParams);
  498. return add_error_function_node(true);
  499. }
  500. TokenizedBuffer::Token close_paren =
  501. tokens_.GetMatchedClosingToken(open_paren);
  502. if (!ParseFunctionSignature()) {
  503. return add_error_function_node(true);
  504. }
  505. switch (NextTokenKind()) {
  506. case TokenKind::OpenCurlyBrace(): {
  507. // Parse a definition which is represented as a code block.
  508. if (auto node =
  509. ParseCodeBlock(start, ParseNodeKind::FunctionDefinitionStart(),
  510. ParseNodeKind::FunctionDefinition())) {
  511. return *node;
  512. }
  513. return add_error_function_node(false);
  514. }
  515. case TokenKind::Semi(): {
  516. return AddNode(ParseNodeKind::FunctionDeclaration(),
  517. Consume(TokenKind::Semi()), start);
  518. }
  519. default: {
  520. CARBON_DIAGNOSTIC(
  521. ExpectedFunctionBodyOrSemi, Error,
  522. "Expected function definition or `;` after function declaration.");
  523. emitter_.Emit(*position_, ExpectedFunctionBodyOrSemi);
  524. // Only need to skip if we've not already found a new line.
  525. return add_error_function_node(tokens_.GetLine(*position_) ==
  526. tokens_.GetLine(close_paren));
  527. }
  528. }
  529. }
  530. auto ParseTree::Parser::ParseVariableDeclaration() -> Node {
  531. // `var` pattern [= expression] `;`
  532. TokenizedBuffer::Token var_token = Consume(TokenKind::Var());
  533. auto start = GetSubtreeStartPosition();
  534. CARBON_RETURN_IF_STACK_LIMITED(AddNode(ParseNodeKind::VariableDeclaration(),
  535. var_token, start,
  536. /*has_error=*/true));
  537. auto pattern = ParsePattern(PatternKind::Variable);
  538. if (!pattern) {
  539. if (auto after_pattern =
  540. FindNextOf({TokenKind::Equal(), TokenKind::Semi()})) {
  541. SkipTo(*after_pattern);
  542. }
  543. }
  544. auto start_init = GetSubtreeStartPosition();
  545. if (auto equal_token = ConsumeIf(TokenKind::Equal())) {
  546. auto init = ParseExpression();
  547. AddNode(ParseNodeKind::VariableInitializer(), *equal_token, start_init,
  548. /*has_error=*/!init);
  549. }
  550. auto semi = ConsumeAndAddLeafNodeIf(TokenKind::Semi(),
  551. ParseNodeKind::DeclarationEnd());
  552. if (!semi) {
  553. emitter_.Emit(*position_, ExpectedSemiAfterExpression);
  554. if (auto semi_token = SkipPastLikelyEnd(var_token)) {
  555. semi = AddLeafNode(ParseNodeKind::DeclarationEnd(), *semi_token,
  556. /*has_error=*/true);
  557. }
  558. }
  559. return AddNode(ParseNodeKind::VariableDeclaration(), var_token, start,
  560. /*has_error=*/!pattern || !semi);
  561. }
  562. auto ParseTree::Parser::ParseEmptyDeclaration() -> Node {
  563. return AddLeafNode(ParseNodeKind::EmptyDeclaration(),
  564. Consume(TokenKind::Semi()));
  565. }
  566. auto ParseTree::Parser::ParseDeclaration() -> llvm::Optional<Node> {
  567. CARBON_RETURN_IF_STACK_LIMITED(llvm::None);
  568. switch (NextTokenKind()) {
  569. case TokenKind::Package():
  570. return ParsePackageDirective();
  571. case TokenKind::Fn():
  572. return ParseFunctionDeclaration();
  573. case TokenKind::Var():
  574. return ParseVariableDeclaration();
  575. case TokenKind::Semi():
  576. return ParseEmptyDeclaration();
  577. case TokenKind::EndOfFile():
  578. return llvm::None;
  579. default:
  580. // Errors are handled outside the switch.
  581. break;
  582. }
  583. // Should happen for packages now.
  584. // We didn't recognize an introducer for a valid declaration.
  585. CARBON_DIAGNOSTIC(UnrecognizedDeclaration, Error,
  586. "Unrecognized declaration introducer.");
  587. emitter_.Emit(*position_, UnrecognizedDeclaration);
  588. // Skip forward past any end of a declaration we simply didn't understand so
  589. // that we can find the start of the next declaration or the end of a scope.
  590. if (auto semi_token = SkipPastLikelyEnd(*position_)) {
  591. return AddLeafNode(ParseNodeKind::EmptyDeclaration(), *semi_token,
  592. /*has_error=*/true);
  593. }
  594. // Nothing, not even a semicolon found.
  595. return llvm::None;
  596. }
  597. auto ParseTree::Parser::ParseParenExpression() -> llvm::Optional<Node> {
  598. CARBON_RETURN_IF_STACK_LIMITED(llvm::None);
  599. // parenthesized-expression ::= `(` expression `)`
  600. // tuple-literal ::= `(` `)`
  601. // ::= `(` expression `,` [expression-list [`,`]] `)`
  602. //
  603. // Parse the union of these, `(` [expression-list [`,`]] `)`, and work out
  604. // whether it's a tuple or a parenthesized expression afterwards.
  605. auto start = GetSubtreeStartPosition();
  606. return ParseParenList(
  607. [&] { return ParseExpression(); }, ParseNodeKind::TupleLiteralComma(),
  608. [&](TokenizedBuffer::Token open_paren, bool is_single_item,
  609. TokenizedBuffer::Token close_paren, bool has_arg_errors) {
  610. AddLeafNode(is_single_item ? ParseNodeKind::ParenExpressionEnd()
  611. : ParseNodeKind::TupleLiteralEnd(),
  612. close_paren);
  613. return AddNode(is_single_item ? ParseNodeKind::ParenExpression()
  614. : ParseNodeKind::TupleLiteral(),
  615. open_paren, start, has_arg_errors);
  616. },
  617. /*allow_trailing_comma=*/true);
  618. }
  619. auto ParseTree::Parser::ParseBraceExpression() -> llvm::Optional<Node> {
  620. CARBON_RETURN_IF_STACK_LIMITED(llvm::None);
  621. // braced-expression ::= `{` [field-value-list] `}`
  622. // ::= `{` field-type-list `}`
  623. // field-value-list ::= field-value [`,`]
  624. // ::= field-value `,` field-value-list
  625. // field-value ::= `.` identifier `=` expression
  626. // field-type-list ::= field-type [`,`]
  627. // ::= field-type `,` field-type-list
  628. // field-type ::= `.` identifier `:` type
  629. //
  630. // Note that `{` `}` is the first form (an empty struct), but that an empty
  631. // struct value also behaves as an empty struct type.
  632. auto start = GetSubtreeStartPosition();
  633. enum Kind { Unknown, Value, Type };
  634. Kind kind = Unknown;
  635. return ParseList(
  636. TokenKind::OpenCurlyBrace(), TokenKind::CloseCurlyBrace(),
  637. [&]() -> llvm::Optional<Node> {
  638. auto start_elem = GetSubtreeStartPosition();
  639. auto diagnose_invalid_syntax = [&] {
  640. CARBON_DIAGNOSTIC(ExpectedStructLiteralField, Error,
  641. "Expected {0}{1}{2}.", llvm::StringRef,
  642. llvm::StringRef, llvm::StringRef);
  643. bool can_be_type = kind != Value;
  644. bool can_be_value = kind != Type;
  645. emitter_.Emit(*position_, ExpectedStructLiteralField,
  646. can_be_type ? "`.field: type`" : "",
  647. (can_be_type && can_be_value) ? " or " : "",
  648. can_be_value ? "`.field = value`" : "");
  649. return llvm::None;
  650. };
  651. if (!NextTokenIs(TokenKind::Period())) {
  652. return diagnose_invalid_syntax();
  653. }
  654. auto designator = ParseDesignatorExpression(
  655. start_elem, ParseNodeKind::StructFieldDesignator(),
  656. /*has_errors=*/false);
  657. if (!designator) {
  658. auto recovery_pos = FindNextOf(
  659. {TokenKind::Equal(), TokenKind::Colon(), TokenKind::Comma()});
  660. if (!recovery_pos ||
  661. tokens_.GetKind(*recovery_pos) == TokenKind::Comma()) {
  662. return llvm::None;
  663. }
  664. SkipTo(*recovery_pos);
  665. }
  666. // Work out the kind of this element
  667. Kind elem_kind = (NextTokenIs(TokenKind::Equal()) ? Value
  668. : NextTokenIs(TokenKind::Colon()) ? Type
  669. : Unknown);
  670. if (elem_kind == Unknown || (kind != Unknown && elem_kind != kind)) {
  671. return diagnose_invalid_syntax();
  672. }
  673. kind = elem_kind;
  674. // Struct type fields and value fields use the same grammar except
  675. // that one has a `:` separator and the other has an `=` separator.
  676. auto equal_or_colon_token =
  677. Consume(kind == Type ? TokenKind::Colon() : TokenKind::Equal());
  678. auto type_or_value = ParseExpression();
  679. return AddNode(kind == Type ? ParseNodeKind::StructFieldType()
  680. : ParseNodeKind::StructFieldValue(),
  681. equal_or_colon_token, start_elem,
  682. /*has_error=*/!designator || !type_or_value);
  683. },
  684. ParseNodeKind::StructComma(),
  685. [&](TokenizedBuffer::Token open_brace, bool /*is_single_item*/,
  686. TokenizedBuffer::Token close_brace, bool has_errors) {
  687. AddLeafNode(ParseNodeKind::StructEnd(), close_brace);
  688. return AddNode(kind == Type ? ParseNodeKind::StructTypeLiteral()
  689. : ParseNodeKind::StructLiteral(),
  690. open_brace, start, has_errors);
  691. },
  692. /*allow_trailing_comma=*/true);
  693. }
  694. auto ParseTree::Parser::ParsePrimaryExpression() -> llvm::Optional<Node> {
  695. CARBON_RETURN_IF_STACK_LIMITED(llvm::None);
  696. llvm::Optional<ParseNodeKind> kind;
  697. switch (NextTokenKind()) {
  698. case TokenKind::Identifier():
  699. kind = ParseNodeKind::NameReference();
  700. break;
  701. case TokenKind::IntegerLiteral():
  702. case TokenKind::RealLiteral():
  703. case TokenKind::StringLiteral():
  704. case TokenKind::IntegerTypeLiteral():
  705. case TokenKind::UnsignedIntegerTypeLiteral():
  706. case TokenKind::FloatingPointTypeLiteral():
  707. kind = ParseNodeKind::Literal();
  708. break;
  709. case TokenKind::OpenParen():
  710. return ParseParenExpression();
  711. case TokenKind::OpenCurlyBrace():
  712. return ParseBraceExpression();
  713. default:
  714. CARBON_DIAGNOSTIC(ExpectedExpression, Error, "Expected expression.");
  715. emitter_.Emit(*position_, ExpectedExpression);
  716. return llvm::None;
  717. }
  718. return AddLeafNode(*kind, Consume(NextTokenKind()));
  719. }
  720. auto ParseTree::Parser::ParseDesignatorExpression(SubtreeStart start,
  721. ParseNodeKind kind,
  722. bool has_errors)
  723. -> llvm::Optional<Node> {
  724. // `.` identifier
  725. auto dot = Consume(TokenKind::Period());
  726. auto name = ConsumeIf(TokenKind::Identifier());
  727. if (name) {
  728. AddLeafNode(ParseNodeKind::DesignatedName(), *name);
  729. } else {
  730. CARBON_DIAGNOSTIC(ExpectedIdentifierAfterDot, Error,
  731. "Expected identifier after `.`.");
  732. emitter_.Emit(*position_, ExpectedIdentifierAfterDot);
  733. // If we see a keyword, assume it was intended to be the designated name.
  734. // TODO: Should keywords be valid in designators?
  735. if (NextTokenKind().IsKeyword()) {
  736. name = Consume(NextTokenKind());
  737. auto name_node = AddLeafNode(ParseNodeKind::DesignatedName(), *name);
  738. MarkNodeError(name_node);
  739. } else {
  740. has_errors = true;
  741. }
  742. }
  743. Node result = AddNode(kind, dot, start, has_errors);
  744. return name ? result : llvm::Optional<Node>();
  745. }
  746. auto ParseTree::Parser::ParseCallExpression(SubtreeStart start, bool has_errors)
  747. -> llvm::Optional<Node> {
  748. CARBON_RETURN_IF_STACK_LIMITED(llvm::None);
  749. // `(` expression-list[opt] `)`
  750. //
  751. // expression-list ::= expression
  752. // ::= expression `,` expression-list
  753. return ParseParenList(
  754. [&] { return ParseExpression(); }, ParseNodeKind::CallExpressionComma(),
  755. [&](TokenizedBuffer::Token open_paren, bool /*is_single_item*/,
  756. TokenizedBuffer::Token close_paren, bool has_arg_errors) {
  757. AddLeafNode(ParseNodeKind::CallExpressionEnd(), close_paren);
  758. return AddNode(ParseNodeKind::CallExpression(), open_paren, start,
  759. has_errors || has_arg_errors);
  760. });
  761. }
  762. auto ParseTree::Parser::ParsePostfixExpression() -> llvm::Optional<Node> {
  763. CARBON_RETURN_IF_STACK_LIMITED(llvm::None);
  764. auto start = GetSubtreeStartPosition();
  765. llvm::Optional<Node> expression = ParsePrimaryExpression();
  766. TokenizedBuffer::TokenIterator last_position = position_;
  767. while (true) {
  768. switch (NextTokenKind()) {
  769. case TokenKind::Period():
  770. expression = ParseDesignatorExpression(
  771. start, ParseNodeKind::DesignatorExpression(), !expression);
  772. break;
  773. case TokenKind::OpenParen():
  774. expression = ParseCallExpression(start, !expression);
  775. break;
  776. default:
  777. return expression;
  778. }
  779. // This is subject to an infinite loop if a child call fails, so monitor
  780. // for stalling.
  781. if (last_position == position_) {
  782. CARBON_CHECK(expression == llvm::None);
  783. return expression;
  784. }
  785. last_position = position_;
  786. }
  787. }
  788. // Determines whether the given token is considered to be the start of an
  789. // operand according to the rules for infix operator parsing.
  790. static auto IsAssumedStartOfOperand(TokenKind kind) -> bool {
  791. return kind.IsOneOf({TokenKind::OpenParen(), TokenKind::Identifier(),
  792. TokenKind::IntegerLiteral(), TokenKind::RealLiteral(),
  793. TokenKind::StringLiteral()});
  794. }
  795. // Determines whether the given token is considered to be the end of an
  796. // operand according to the rules for infix operator parsing.
  797. static auto IsAssumedEndOfOperand(TokenKind kind) -> bool {
  798. return kind.IsOneOf({TokenKind::CloseParen(), TokenKind::CloseCurlyBrace(),
  799. TokenKind::CloseSquareBracket(), TokenKind::Identifier(),
  800. TokenKind::IntegerLiteral(), TokenKind::RealLiteral(),
  801. TokenKind::StringLiteral()});
  802. }
  803. // Determines whether the given token could possibly be the start of an
  804. // operand. This is conservatively correct, and will never incorrectly return
  805. // `false`, but can incorrectly return `true`.
  806. static auto IsPossibleStartOfOperand(TokenKind kind) -> bool {
  807. return !kind.IsOneOf({TokenKind::CloseParen(), TokenKind::CloseCurlyBrace(),
  808. TokenKind::CloseSquareBracket(), TokenKind::Comma(),
  809. TokenKind::Semi(), TokenKind::Colon()});
  810. }
  811. auto ParseTree::Parser::IsLexicallyValidInfixOperator() -> bool {
  812. CARBON_CHECK(!AtEndOfFile()) << "Expected an operator token.";
  813. bool leading_space = tokens_.HasLeadingWhitespace(*position_);
  814. bool trailing_space = tokens_.HasTrailingWhitespace(*position_);
  815. // If there's whitespace on both sides, it's an infix operator.
  816. if (leading_space && trailing_space) {
  817. return true;
  818. }
  819. // If there's whitespace on exactly one side, it's not an infix operator.
  820. if (leading_space || trailing_space) {
  821. return false;
  822. }
  823. // Otherwise, for an infix operator, the preceding token must be any close
  824. // bracket, identifier, or literal and the next token must be an open paren,
  825. // identifier, or literal.
  826. if (position_ == tokens_.tokens().begin() ||
  827. !IsAssumedEndOfOperand(tokens_.GetKind(*(position_ - 1))) ||
  828. !IsAssumedStartOfOperand(tokens_.GetKind(*(position_ + 1)))) {
  829. return false;
  830. }
  831. return true;
  832. }
  833. auto ParseTree::Parser::DiagnoseOperatorFixity(OperatorFixity fixity) -> void {
  834. bool is_valid_as_infix = IsLexicallyValidInfixOperator();
  835. if (fixity == OperatorFixity::Infix) {
  836. // Infix operators must satisfy the infix operator rules.
  837. if (!is_valid_as_infix) {
  838. CARBON_DIAGNOSTIC(BinaryOperatorRequiresWhitespace, Error,
  839. "Whitespace missing {0} binary operator.",
  840. RelativeLocation);
  841. emitter_.Emit(*position_, BinaryOperatorRequiresWhitespace,
  842. tokens_.HasLeadingWhitespace(*position_)
  843. ? RelativeLocation::After
  844. : (tokens_.HasTrailingWhitespace(*position_)
  845. ? RelativeLocation::Before
  846. : RelativeLocation::Around));
  847. }
  848. } else {
  849. bool prefix = fixity == OperatorFixity::Prefix;
  850. // Whitespace is not permitted between a symbolic pre/postfix operator and
  851. // its operand.
  852. if (NextTokenKind().IsSymbol() &&
  853. (prefix ? tokens_.HasTrailingWhitespace(*position_)
  854. : tokens_.HasLeadingWhitespace(*position_))) {
  855. CARBON_DIAGNOSTIC(UnaryOperatorHasWhitespace, Error,
  856. "Whitespace is not allowed {0} this unary operator.",
  857. RelativeLocation);
  858. emitter_.Emit(
  859. *position_, UnaryOperatorHasWhitespace,
  860. prefix ? RelativeLocation::After : RelativeLocation::Before);
  861. }
  862. // Pre/postfix operators must not satisfy the infix operator rules.
  863. if (is_valid_as_infix) {
  864. CARBON_DIAGNOSTIC(UnaryOperatorRequiresWhitespace, Error,
  865. "Whitespace is required {0} this unary operator.",
  866. RelativeLocation);
  867. emitter_.Emit(
  868. *position_, UnaryOperatorRequiresWhitespace,
  869. prefix ? RelativeLocation::Before : RelativeLocation::After);
  870. }
  871. }
  872. }
  873. auto ParseTree::Parser::IsTrailingOperatorInfix() -> bool {
  874. if (AtEndOfFile()) {
  875. return false;
  876. }
  877. // An operator that follows the infix operator rules is parsed as
  878. // infix, unless the next token means that it can't possibly be.
  879. if (IsLexicallyValidInfixOperator() &&
  880. IsPossibleStartOfOperand(tokens_.GetKind(*(position_ + 1)))) {
  881. return true;
  882. }
  883. // A trailing operator with leading whitespace that's not valid as infix is
  884. // not valid at all. If the next token looks like the start of an operand,
  885. // then parse as infix, otherwise as postfix. Either way we'll produce a
  886. // diagnostic later on.
  887. if (tokens_.HasLeadingWhitespace(*position_) &&
  888. IsAssumedStartOfOperand(tokens_.GetKind(*(position_ + 1)))) {
  889. return true;
  890. }
  891. return false;
  892. }
  893. auto ParseTree::Parser::ParseOperatorExpression(
  894. PrecedenceGroup ambient_precedence) -> llvm::Optional<Node> {
  895. // May be omitted a couple different ways here.
  896. CARBON_DIAGNOSTIC(
  897. OperatorRequiresParentheses, Error,
  898. "Parentheses are required to disambiguate operator precedence.");
  899. CARBON_RETURN_IF_STACK_LIMITED(llvm::None);
  900. auto start = GetSubtreeStartPosition();
  901. llvm::Optional<Node> lhs;
  902. PrecedenceGroup lhs_precedence = PrecedenceGroup::ForPostfixExpression();
  903. // Check for a prefix operator.
  904. if (auto operator_precedence = PrecedenceGroup::ForLeading(NextTokenKind());
  905. !operator_precedence) {
  906. lhs = ParsePostfixExpression();
  907. } else {
  908. if (PrecedenceGroup::GetPriority(ambient_precedence,
  909. *operator_precedence) !=
  910. OperatorPriority::RightFirst) {
  911. // The precedence rules don't permit this prefix operator in this
  912. // context. Diagnose this, but carry on and parse it anyway.
  913. emitter_.Emit(*position_, OperatorRequiresParentheses);
  914. } else {
  915. // Check that this operator follows the proper whitespace rules.
  916. DiagnoseOperatorFixity(OperatorFixity::Prefix);
  917. }
  918. auto operator_token = Consume(NextTokenKind());
  919. bool has_errors = !ParseOperatorExpression(*operator_precedence);
  920. lhs = AddNode(ParseNodeKind::PrefixOperator(), operator_token, start,
  921. has_errors);
  922. lhs_precedence = *operator_precedence;
  923. }
  924. // Consume a sequence of infix and postfix operators.
  925. while (auto trailing_operator = PrecedenceGroup::ForTrailing(
  926. NextTokenKind(), IsTrailingOperatorInfix())) {
  927. auto [operator_precedence, is_binary] = *trailing_operator;
  928. // TODO: If this operator is ambiguous with either the ambient precedence
  929. // or the LHS precedence, and there's a variant with a different fixity
  930. // that would work, use that one instead for error recovery.
  931. if (PrecedenceGroup::GetPriority(ambient_precedence, operator_precedence) !=
  932. OperatorPriority::RightFirst) {
  933. // The precedence rules don't permit this operator in this context. Try
  934. // again in the enclosing expression context.
  935. return lhs;
  936. }
  937. if (PrecedenceGroup::GetPriority(lhs_precedence, operator_precedence) !=
  938. OperatorPriority::LeftFirst) {
  939. // Either the LHS operator and this operator are ambiguous, or the
  940. // LHS operator is a unary operator that can't be nested within
  941. // this operator. Either way, parentheses are required.
  942. emitter_.Emit(*position_, OperatorRequiresParentheses);
  943. lhs = llvm::None;
  944. } else {
  945. DiagnoseOperatorFixity(is_binary ? OperatorFixity::Infix
  946. : OperatorFixity::Postfix);
  947. }
  948. auto operator_token = Consume(NextTokenKind());
  949. if (is_binary) {
  950. auto rhs = ParseOperatorExpression(operator_precedence);
  951. lhs = AddNode(ParseNodeKind::InfixOperator(), operator_token, start,
  952. /*has_error=*/!lhs || !rhs);
  953. } else {
  954. lhs = AddNode(ParseNodeKind::PostfixOperator(), operator_token, start,
  955. /*has_error=*/!lhs);
  956. }
  957. lhs_precedence = operator_precedence;
  958. }
  959. return lhs;
  960. }
  961. auto ParseTree::Parser::ParseExpression() -> llvm::Optional<Node> {
  962. CARBON_RETURN_IF_STACK_LIMITED(llvm::None);
  963. return ParseOperatorExpression(PrecedenceGroup::ForTopLevelExpression());
  964. }
  965. auto ParseTree::Parser::ParseType() -> llvm::Optional<Node> {
  966. CARBON_RETURN_IF_STACK_LIMITED(llvm::None);
  967. return ParseOperatorExpression(PrecedenceGroup::ForType());
  968. }
  969. auto ParseTree::Parser::ParseExpressionStatement() -> llvm::Optional<Node> {
  970. CARBON_RETURN_IF_STACK_LIMITED(llvm::None);
  971. TokenizedBuffer::Token start_token = *position_;
  972. auto start = GetSubtreeStartPosition();
  973. bool has_errors = !ParseExpression();
  974. if (auto semi = ConsumeIf(TokenKind::Semi())) {
  975. return AddNode(ParseNodeKind::ExpressionStatement(), *semi, start,
  976. has_errors);
  977. }
  978. if (!has_errors) {
  979. emitter_.Emit(*position_, ExpectedSemiAfterExpression);
  980. }
  981. if (auto semi_token = SkipPastLikelyEnd(start_token)) {
  982. return AddNode(ParseNodeKind::ExpressionStatement(), *semi_token, start,
  983. /*has_error=*/true);
  984. }
  985. // Found junk not even followed by a `;`.
  986. return llvm::None;
  987. }
  988. auto ParseTree::Parser::ParseParenCondition(TokenKind introducer)
  989. -> llvm::Optional<Node> {
  990. CARBON_RETURN_IF_STACK_LIMITED(llvm::None);
  991. // `(` expression `)`
  992. auto start = GetSubtreeStartPosition();
  993. auto open_paren = ConsumeIf(TokenKind::OpenParen());
  994. if (!open_paren) {
  995. CARBON_DIAGNOSTIC(ExpectedParenAfter, Error, "Expected `(` after `{0}`.",
  996. TokenKind);
  997. emitter_.Emit(*position_, ExpectedParenAfter, introducer);
  998. }
  999. auto expr = ParseExpression();
  1000. if (!open_paren) {
  1001. // Don't expect a matching closing paren if there wasn't an opening paren.
  1002. return llvm::None;
  1003. }
  1004. auto close_paren =
  1005. ParseCloseParen(*open_paren, ParseNodeKind::ConditionEnd());
  1006. return AddNode(ParseNodeKind::Condition(), *open_paren, start,
  1007. /*has_error=*/!expr || !close_paren);
  1008. }
  1009. auto ParseTree::Parser::ParseIfStatement() -> llvm::Optional<Node> {
  1010. auto start = GetSubtreeStartPosition();
  1011. auto if_token = Consume(TokenKind::If());
  1012. auto cond = ParseParenCondition(TokenKind::If());
  1013. auto then_case = ParseCodeBlock();
  1014. bool else_has_errors = false;
  1015. if (ConsumeAndAddLeafNodeIf(TokenKind::Else(),
  1016. ParseNodeKind::IfStatementElse())) {
  1017. // 'else if' is permitted as a special case.
  1018. if (NextTokenIs(TokenKind::If())) {
  1019. else_has_errors = !ParseIfStatement();
  1020. } else {
  1021. else_has_errors = !ParseCodeBlock();
  1022. }
  1023. }
  1024. return AddNode(ParseNodeKind::IfStatement(), if_token, start,
  1025. /*has_error=*/!cond || !then_case || else_has_errors);
  1026. }
  1027. auto ParseTree::Parser::ParseWhileStatement() -> llvm::Optional<Node> {
  1028. CARBON_RETURN_IF_STACK_LIMITED(llvm::None);
  1029. auto start = GetSubtreeStartPosition();
  1030. auto while_token = Consume(TokenKind::While());
  1031. auto cond = ParseParenCondition(TokenKind::While());
  1032. auto body = ParseCodeBlock();
  1033. return AddNode(ParseNodeKind::WhileStatement(), while_token, start,
  1034. /*has_error=*/!cond || !body);
  1035. }
  1036. auto ParseTree::Parser::ParseForStatement() -> llvm::Optional<Node> {
  1037. CARBON_RETURN_IF_STACK_LIMITED(llvm::None);
  1038. auto for_start = GetSubtreeStartPosition();
  1039. auto for_token = Consume(TokenKind::For());
  1040. // Parse the loop header.
  1041. auto header = [this]() -> llvm::Optional<Node> {
  1042. auto open_paren = ConsumeIf(TokenKind::OpenParen());
  1043. auto header_start = GetSubtreeStartPosition();
  1044. if (!open_paren) {
  1045. CARBON_DIAGNOSTIC(ExpectedParenAfter, Error,
  1046. "Expected `(` after `{0}`. Recovering from missing `(` "
  1047. "not implemented yet!",
  1048. TokenKind);
  1049. emitter_.Emit(*position_, ExpectedParenAfter, TokenKind::For());
  1050. // TODO: A proper recovery strategy is needed here. For now, I assume
  1051. // that all brackets are properly balanced (i.e. each open bracket has a
  1052. // closing one).
  1053. // This is temporary until we come to a conclusion regarding the
  1054. // recovery tokens strategy.
  1055. return llvm::None;
  1056. }
  1057. bool iter_var_parsed = false;
  1058. if (NextTokenIs(TokenKind::Var())) {
  1059. auto var_token = Consume(TokenKind::Var());
  1060. auto var_start = GetSubtreeStartPosition();
  1061. auto pattern = ParsePattern(PatternKind::Variable);
  1062. AddNode(ParseNodeKind::VariableDeclaration(), var_token, var_start,
  1063. !pattern);
  1064. iter_var_parsed = true;
  1065. } else {
  1066. CARBON_DIAGNOSTIC(ExpectedVariableDeclaration, Error,
  1067. "Expected `var` declaration.");
  1068. emitter_.Emit(*position_, ExpectedVariableDeclaration);
  1069. if (auto next_in = FindNextOf({TokenKind::In()}); next_in) {
  1070. SkipTo(*next_in);
  1071. }
  1072. }
  1073. // A separator is either an `in` or a `:`. Even though `:` is incorrect,
  1074. // accidentally typing it by a C++ programmer might be a common mistake
  1075. // that warrants special handling.
  1076. bool separator_parsed = false;
  1077. bool in_parsed = false;
  1078. if (NextTokenIs(TokenKind::In())) {
  1079. separator_parsed = true;
  1080. in_parsed = true;
  1081. AddLeafNode(ParseNodeKind::ForIn(), Consume(TokenKind::In()));
  1082. } else if (NextTokenIs(TokenKind::Colon())) {
  1083. separator_parsed = true;
  1084. CARBON_DIAGNOSTIC(ExpectedIn, Error, "`:` should be replaced by `in`.");
  1085. emitter_.Emit(*position_, ExpectedIn);
  1086. Consume(TokenKind::Colon());
  1087. } else {
  1088. CARBON_DIAGNOSTIC(ExpectedIn, Error,
  1089. "Expected `in` after loop `var` declaration.");
  1090. emitter_.Emit(*position_, ExpectedIn);
  1091. SkipTo(tokens_.GetMatchedClosingToken(*open_paren));
  1092. }
  1093. // Only try to parse the container expression if a separator was parsed.
  1094. // This reduces the emitted error messages if the separator was missing
  1095. // altogether.
  1096. auto container_expr = separator_parsed ? ParseExpression() : llvm::None;
  1097. auto close_paren =
  1098. ParseCloseParen(*open_paren, ParseNodeKind::ForHeaderEnd());
  1099. return AddNode(
  1100. ParseNodeKind::ForHeader(), *open_paren, header_start,
  1101. !iter_var_parsed || !in_parsed || !container_expr || !close_paren);
  1102. }();
  1103. auto body = ParseCodeBlock();
  1104. return AddNode(ParseNodeKind::ForStatement(), for_token, for_start,
  1105. !header || !body);
  1106. }
  1107. auto ParseTree::Parser::ParseKeywordStatement(ParseNodeKind kind,
  1108. KeywordStatementArgument argument)
  1109. -> llvm::Optional<Node> {
  1110. CARBON_RETURN_IF_STACK_LIMITED(llvm::None);
  1111. auto keyword_kind = NextTokenKind();
  1112. CARBON_CHECK(keyword_kind.IsKeyword());
  1113. auto start = GetSubtreeStartPosition();
  1114. auto keyword = Consume(keyword_kind);
  1115. bool arg_error = false;
  1116. if ((argument == KeywordStatementArgument::Optional &&
  1117. NextTokenKind() != TokenKind::Semi()) ||
  1118. argument == KeywordStatementArgument::Mandatory) {
  1119. arg_error = !ParseExpression();
  1120. }
  1121. auto semi =
  1122. ConsumeAndAddLeafNodeIf(TokenKind::Semi(), ParseNodeKind::StatementEnd());
  1123. if (!semi) {
  1124. CARBON_DIAGNOSTIC(ExpectedSemiAfter, Error, "Expected `;` after `{0}`.",
  1125. TokenKind);
  1126. emitter_.Emit(*position_, ExpectedSemiAfter, keyword_kind);
  1127. // TODO: Try to skip to a semicolon to recover.
  1128. }
  1129. return AddNode(kind, keyword, start, /*has_error=*/!semi || arg_error);
  1130. }
  1131. auto ParseTree::Parser::ParseStatement() -> llvm::Optional<Node> {
  1132. CARBON_RETURN_IF_STACK_LIMITED(llvm::None);
  1133. switch (NextTokenKind()) {
  1134. case TokenKind::Var():
  1135. return ParseVariableDeclaration();
  1136. case TokenKind::If():
  1137. return ParseIfStatement();
  1138. case TokenKind::While():
  1139. return ParseWhileStatement();
  1140. case TokenKind::For():
  1141. return ParseForStatement();
  1142. case TokenKind::Continue():
  1143. return ParseKeywordStatement(ParseNodeKind::ContinueStatement(),
  1144. KeywordStatementArgument::None);
  1145. case TokenKind::Break():
  1146. return ParseKeywordStatement(ParseNodeKind::BreakStatement(),
  1147. KeywordStatementArgument::None);
  1148. case TokenKind::Return():
  1149. return ParseKeywordStatement(ParseNodeKind::ReturnStatement(),
  1150. KeywordStatementArgument::Optional);
  1151. default:
  1152. // A statement with no introducer token can only be an expression
  1153. // statement.
  1154. return ParseExpressionStatement();
  1155. }
  1156. }
  1157. } // namespace Carbon