parser_impl.cpp 44 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "toolchain/parser/parser_impl.h"
  5. #include <cstdlib>
  6. #include "common/check.h"
  7. #include "llvm/ADT/Optional.h"
  8. #include "llvm/Support/FormatVariadic.h"
  9. #include "llvm/Support/raw_ostream.h"
  10. #include "toolchain/lexer/token_kind.h"
  11. #include "toolchain/lexer/tokenized_buffer.h"
  12. #include "toolchain/parser/parse_node_kind.h"
  13. #include "toolchain/parser/parse_tree.h"
  14. namespace Carbon {
  15. struct StackLimitExceeded : DiagnosticBase<StackLimitExceeded> {
  16. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  17. static auto Format() -> std::string {
  18. return llvm::formatv("Exceeded recursion limit ({0})",
  19. ParseTree::StackDepthLimit);
  20. }
  21. };
  22. // Manages the parser's stack depth, particularly decrementing on destruction.
  23. // This should only be instantiated through RETURN_IF_STACK_LIMITED.
  24. class ParseTree::Parser::ScopedStackStep {
  25. public:
  26. explicit ScopedStackStep(ParseTree::Parser* parser) : parser_(parser) {
  27. ++parser_->stack_depth_;
  28. }
  29. ~ScopedStackStep() { --parser_->stack_depth_; }
  30. auto VerifyUnderLimit() -> bool {
  31. if (parser_->stack_depth_ >= StackDepthLimit) {
  32. parser_->emitter_.EmitError<StackLimitExceeded>(*parser_->position_);
  33. return false;
  34. }
  35. return true;
  36. }
  37. private:
  38. ParseTree::Parser* parser_;
  39. };
  40. // Encapsulates checking the stack and erroring if needed. This should be called
  41. // at the start of every parse function.
  42. #define RETURN_IF_STACK_LIMITED(error_return_expr) \
  43. ScopedStackStep scoped_stack_step(this); \
  44. if (!scoped_stack_step.VerifyUnderLimit()) { \
  45. return (error_return_expr); \
  46. }
  47. struct UnexpectedTokenInCodeBlock : DiagnosticBase<UnexpectedTokenInCodeBlock> {
  48. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  49. static constexpr llvm::StringLiteral Message =
  50. "Unexpected token in code block.";
  51. };
  52. struct ExpectedFunctionName : DiagnosticBase<ExpectedFunctionName> {
  53. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  54. static constexpr llvm::StringLiteral Message =
  55. "Expected function name after `fn` keyword.";
  56. };
  57. struct ExpectedFunctionParams : DiagnosticBase<ExpectedFunctionParams> {
  58. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  59. static constexpr llvm::StringLiteral Message =
  60. "Expected `(` after function name.";
  61. };
  62. struct ExpectedFunctionBodyOrSemi : DiagnosticBase<ExpectedFunctionBodyOrSemi> {
  63. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  64. static constexpr llvm::StringLiteral Message =
  65. "Expected function definition or `;` after function declaration.";
  66. };
  67. struct ExpectedVariableName : DiagnosticBase<ExpectedVariableName> {
  68. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  69. static constexpr llvm::StringLiteral Message =
  70. "Expected pattern in `var` declaration.";
  71. };
  72. struct ExpectedParameterName : DiagnosticBase<ExpectedParameterName> {
  73. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  74. static constexpr llvm::StringLiteral Message =
  75. "Expected parameter declaration.";
  76. };
  77. struct ExpectedStructLiteralField : DiagnosticBase<ExpectedStructLiteralField> {
  78. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  79. auto Format() -> std::string {
  80. std::string result = "Expected ";
  81. if (can_be_type) {
  82. result += "`.field: type`";
  83. }
  84. if (can_be_type && can_be_value) {
  85. result += " or ";
  86. }
  87. if (can_be_value) {
  88. result += "`.field = value`";
  89. }
  90. result += ".";
  91. return result;
  92. }
  93. bool can_be_type;
  94. bool can_be_value;
  95. };
  96. struct UnrecognizedDeclaration : DiagnosticBase<UnrecognizedDeclaration> {
  97. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  98. static constexpr llvm::StringLiteral Message =
  99. "Unrecognized declaration introducer.";
  100. };
  101. struct ExpectedCodeBlock : DiagnosticBase<ExpectedCodeBlock> {
  102. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  103. static constexpr llvm::StringLiteral Message = "Expected braced code block.";
  104. };
  105. struct ExpectedExpression : DiagnosticBase<ExpectedExpression> {
  106. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  107. static constexpr llvm::StringLiteral Message = "Expected expression.";
  108. };
  109. struct ExpectedParenAfter : DiagnosticBase<ExpectedParenAfter> {
  110. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  111. static constexpr const char* Message = "Expected `(` after `{0}`.";
  112. auto Format() -> std::string {
  113. return llvm::formatv(Message, introducer.GetFixedSpelling()).str();
  114. }
  115. TokenKind introducer;
  116. };
  117. struct ExpectedCloseParen : DiagnosticBase<ExpectedCloseParen> {
  118. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  119. static constexpr llvm::StringLiteral Message =
  120. "Unexpected tokens before `)`.";
  121. // TODO: Include the location of the matching open paren in the diagnostic.
  122. TokenizedBuffer::Token open_paren;
  123. };
  124. struct ExpectedSemiAfterExpression
  125. : DiagnosticBase<ExpectedSemiAfterExpression> {
  126. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  127. static constexpr llvm::StringLiteral Message =
  128. "Expected `;` after expression.";
  129. };
  130. struct ExpectedSemiAfter : DiagnosticBase<ExpectedSemiAfter> {
  131. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  132. static constexpr const char* Message = "Expected `;` after `{0}`.";
  133. auto Format() -> std::string {
  134. return llvm::formatv(Message, preceding.GetFixedSpelling()).str();
  135. }
  136. TokenKind preceding;
  137. };
  138. struct ExpectedIdentifierAfterDot : DiagnosticBase<ExpectedIdentifierAfterDot> {
  139. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  140. static constexpr llvm::StringLiteral Message =
  141. "Expected identifier after `.`.";
  142. };
  143. struct UnexpectedTokenAfterListElement
  144. : DiagnosticBase<UnexpectedTokenAfterListElement> {
  145. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  146. static constexpr const char* Message = "Expected `,` or `{0}`.";
  147. auto Format() -> std::string {
  148. return llvm::formatv(Message, close.GetFixedSpelling()).str();
  149. }
  150. TokenKind close;
  151. };
  152. struct BinaryOperatorRequiresWhitespace
  153. : DiagnosticBase<BinaryOperatorRequiresWhitespace> {
  154. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  155. static constexpr const char* Message =
  156. "Whitespace missing {0} binary operator.";
  157. auto Format() -> std::string {
  158. const char* position = "around";
  159. if (has_leading_space) {
  160. position = "after";
  161. } else if (has_trailing_space) {
  162. position = "before";
  163. }
  164. return llvm::formatv(Message, position);
  165. }
  166. bool has_leading_space;
  167. bool has_trailing_space;
  168. };
  169. struct UnaryOperatorHasWhitespace : DiagnosticBase<UnaryOperatorHasWhitespace> {
  170. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  171. static constexpr const char* Message =
  172. "Whitespace is not allowed {0} this unary operator.";
  173. auto Format() -> std::string {
  174. return llvm::formatv(Message, prefix ? "after" : "before");
  175. }
  176. bool prefix;
  177. };
  178. struct UnaryOperatorRequiresWhitespace
  179. : DiagnosticBase<UnaryOperatorRequiresWhitespace> {
  180. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  181. static constexpr const char* Message =
  182. "Whitespace is required {0} this unary operator.";
  183. auto Format() -> std::string {
  184. return llvm::formatv(Message, prefix ? "before" : "after");
  185. }
  186. bool prefix;
  187. };
  188. struct OperatorRequiresParentheses
  189. : DiagnosticBase<OperatorRequiresParentheses> {
  190. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  191. static constexpr llvm::StringLiteral Message =
  192. "Parentheses are required to disambiguate operator precedence.";
  193. };
  194. ParseTree::Parser::Parser(ParseTree& tree_arg, TokenizedBuffer& tokens_arg,
  195. TokenDiagnosticEmitter& emitter)
  196. : tree_(tree_arg),
  197. tokens_(tokens_arg),
  198. emitter_(emitter),
  199. position_(tokens_.tokens().begin()),
  200. end_(tokens_.tokens().end()) {
  201. CHECK(std::find_if(position_, end_,
  202. [&](TokenizedBuffer::Token t) {
  203. return tokens_.GetKind(t) == TokenKind::EndOfFile();
  204. }) != end_)
  205. << "No EndOfFileToken in token buffer.";
  206. }
  207. auto ParseTree::Parser::Parse(TokenizedBuffer& tokens,
  208. TokenDiagnosticEmitter& emitter) -> ParseTree {
  209. ParseTree tree(tokens);
  210. // We expect to have a 1:1 correspondence between tokens and tree nodes, so
  211. // reserve the space we expect to need here to avoid allocation and copying
  212. // overhead.
  213. tree.node_impls_.reserve(tokens.size());
  214. Parser parser(tree, tokens, emitter);
  215. while (!parser.AtEndOfFile()) {
  216. if (!parser.ParseDeclaration()) {
  217. // We don't have an enclosing parse tree node to mark as erroneous, so
  218. // just mark the tree as a whole.
  219. tree.has_errors_ = true;
  220. }
  221. }
  222. parser.AddLeafNode(ParseNodeKind::FileEnd(), *parser.position_);
  223. CHECK(tree.Verify()) << "Parse tree built but does not verify!";
  224. return tree;
  225. }
  226. auto ParseTree::Parser::Consume(TokenKind kind) -> TokenizedBuffer::Token {
  227. CHECK(kind != TokenKind::EndOfFile()) << "Cannot consume the EOF token!";
  228. CHECK(NextTokenIs(kind)) << "The current token is the wrong kind!";
  229. TokenizedBuffer::Token t = *position_;
  230. ++position_;
  231. CHECK(position_ != end_)
  232. << "Reached end of tokens without finding EOF token.";
  233. return t;
  234. }
  235. auto ParseTree::Parser::ConsumeIf(TokenKind kind)
  236. -> llvm::Optional<TokenizedBuffer::Token> {
  237. if (!NextTokenIs(kind)) {
  238. return {};
  239. }
  240. return Consume(kind);
  241. }
  242. auto ParseTree::Parser::AddLeafNode(ParseNodeKind kind,
  243. TokenizedBuffer::Token token) -> Node {
  244. Node n(tree_.node_impls_.size());
  245. tree_.node_impls_.push_back(NodeImpl(kind, token, /*subtree_size_arg=*/1));
  246. return n;
  247. }
  248. auto ParseTree::Parser::ConsumeAndAddLeafNodeIf(TokenKind t_kind,
  249. ParseNodeKind n_kind)
  250. -> llvm::Optional<Node> {
  251. auto t = ConsumeIf(t_kind);
  252. if (!t) {
  253. return {};
  254. }
  255. return AddLeafNode(n_kind, *t);
  256. }
  257. auto ParseTree::Parser::MarkNodeError(Node n) -> void {
  258. tree_.node_impls_[n.index_].has_error = true;
  259. tree_.has_errors_ = true;
  260. }
  261. // A marker for the start of a node's subtree.
  262. //
  263. // This is used to track the size of the node's subtree. It can be used
  264. // repeatedly if multiple subtrees start at the same position.
  265. struct ParseTree::Parser::SubtreeStart {
  266. int tree_size;
  267. };
  268. auto ParseTree::Parser::GetSubtreeStartPosition() -> SubtreeStart {
  269. return {static_cast<int>(tree_.node_impls_.size())};
  270. }
  271. auto ParseTree::Parser::AddNode(ParseNodeKind n_kind, TokenizedBuffer::Token t,
  272. SubtreeStart start, bool has_error) -> Node {
  273. // The size of the subtree is the change in size from when we started this
  274. // subtree to now, but including the node we're about to add.
  275. int tree_stop_size = static_cast<int>(tree_.node_impls_.size()) + 1;
  276. int subtree_size = tree_stop_size - start.tree_size;
  277. Node n(tree_.node_impls_.size());
  278. tree_.node_impls_.push_back(NodeImpl(n_kind, t, subtree_size));
  279. if (has_error) {
  280. MarkNodeError(n);
  281. }
  282. return n;
  283. }
  284. auto ParseTree::Parser::SkipMatchingGroup() -> bool {
  285. TokenizedBuffer::Token t = *position_;
  286. TokenKind t_kind = tokens_.GetKind(t);
  287. if (!t_kind.IsOpeningSymbol()) {
  288. return false;
  289. }
  290. SkipTo(tokens_.GetMatchedClosingToken(t));
  291. Consume(t_kind.GetClosingSymbol());
  292. return true;
  293. }
  294. auto ParseTree::Parser::SkipTo(TokenizedBuffer::Token t) -> void {
  295. CHECK(t >= *position_) << "Tried to skip backwards.";
  296. position_ = TokenizedBuffer::TokenIterator(t);
  297. CHECK(position_ != end_) << "Skipped past EOF.";
  298. }
  299. auto ParseTree::Parser::FindNextOf(
  300. std::initializer_list<TokenKind> desired_kinds)
  301. -> llvm::Optional<TokenizedBuffer::Token> {
  302. auto new_position = position_;
  303. while (true) {
  304. TokenizedBuffer::Token token = *new_position;
  305. TokenKind kind = tokens_.GetKind(token);
  306. if (kind.IsOneOf(desired_kinds)) {
  307. return token;
  308. }
  309. // Step to the next token at the current bracketing level.
  310. if (kind.IsClosingSymbol() || kind == TokenKind::EndOfFile()) {
  311. // There are no more tokens at this level.
  312. return llvm::None;
  313. } else if (kind.IsOpeningSymbol()) {
  314. new_position =
  315. TokenizedBuffer::TokenIterator(tokens_.GetMatchedClosingToken(token));
  316. // Advance past the closing token.
  317. ++new_position;
  318. } else {
  319. ++new_position;
  320. }
  321. }
  322. }
  323. auto ParseTree::Parser::SkipPastLikelyEnd(TokenizedBuffer::Token skip_root,
  324. SemiHandler on_semi)
  325. -> llvm::Optional<Node> {
  326. if (AtEndOfFile()) {
  327. return llvm::None;
  328. }
  329. TokenizedBuffer::Line root_line = tokens_.GetLine(skip_root);
  330. int root_line_indent = tokens_.GetIndentColumnNumber(root_line);
  331. // We will keep scanning through tokens on the same line as the root or
  332. // lines with greater indentation than root's line.
  333. auto is_same_line_or_indent_greater_than_root =
  334. [&](TokenizedBuffer::Token t) {
  335. TokenizedBuffer::Line l = tokens_.GetLine(t);
  336. if (l == root_line) {
  337. return true;
  338. }
  339. return tokens_.GetIndentColumnNumber(l) > root_line_indent;
  340. };
  341. do {
  342. if (NextTokenKind() == TokenKind::CloseCurlyBrace()) {
  343. // Immediately bail out if we hit an unmatched close curly, this will
  344. // pop us up a level of the syntax grouping.
  345. return llvm::None;
  346. }
  347. // We assume that a semicolon is always intended to be the end of the
  348. // current construct.
  349. if (auto semi = ConsumeIf(TokenKind::Semi())) {
  350. return on_semi(*semi);
  351. }
  352. // Skip over any matching group of tokens_.
  353. if (SkipMatchingGroup()) {
  354. continue;
  355. }
  356. // Otherwise just step forward one token.
  357. Consume(NextTokenKind());
  358. } while (!AtEndOfFile() &&
  359. is_same_line_or_indent_greater_than_root(*position_));
  360. return llvm::None;
  361. }
  362. auto ParseTree::Parser::ParseCloseParen(TokenizedBuffer::Token open_paren,
  363. ParseNodeKind kind)
  364. -> llvm::Optional<Node> {
  365. if (auto close_paren =
  366. ConsumeAndAddLeafNodeIf(TokenKind::CloseParen(), kind)) {
  367. return close_paren;
  368. }
  369. emitter_.EmitError<ExpectedCloseParen>(*position_,
  370. {.open_paren = open_paren});
  371. SkipTo(tokens_.GetMatchedClosingToken(open_paren));
  372. AddLeafNode(kind, Consume(TokenKind::CloseParen()));
  373. return llvm::None;
  374. }
  375. template <typename ListElementParser, typename ListCompletionHandler>
  376. auto ParseTree::Parser::ParseList(TokenKind open, TokenKind close,
  377. ListElementParser list_element_parser,
  378. ParseNodeKind comma_kind,
  379. ListCompletionHandler list_handler,
  380. bool allow_trailing_comma)
  381. -> llvm::Optional<Node> {
  382. // `(` element-list[opt] `)`
  383. //
  384. // element-list ::= element
  385. // ::= element `,` element-list
  386. TokenizedBuffer::Token open_paren = Consume(open);
  387. bool has_errors = false;
  388. bool any_commas = false;
  389. int64_t num_elements = 0;
  390. // Parse elements, if any are specified.
  391. if (!NextTokenIs(close)) {
  392. while (true) {
  393. bool element_error = !list_element_parser();
  394. has_errors |= element_error;
  395. ++num_elements;
  396. if (!NextTokenIsOneOf({close, TokenKind::Comma()})) {
  397. if (!element_error) {
  398. emitter_.EmitError<UnexpectedTokenAfterListElement>(*position_,
  399. {.close = close});
  400. }
  401. has_errors = true;
  402. auto end_of_element = FindNextOf({TokenKind::Comma(), close});
  403. // The lexer guarantees that parentheses are balanced.
  404. CHECK(end_of_element) << "missing matching `)` for `(`";
  405. SkipTo(*end_of_element);
  406. }
  407. if (NextTokenIs(close)) {
  408. break;
  409. }
  410. AddLeafNode(comma_kind, Consume(TokenKind::Comma()));
  411. any_commas = true;
  412. if (allow_trailing_comma && NextTokenIs(close)) {
  413. break;
  414. }
  415. }
  416. }
  417. bool is_single_item = num_elements == 1 && !any_commas;
  418. return list_handler(open_paren, is_single_item, Consume(close), has_errors);
  419. }
  420. auto ParseTree::Parser::ParsePattern(PatternKind kind) -> llvm::Optional<Node> {
  421. RETURN_IF_STACK_LIMITED(llvm::None);
  422. if (NextTokenIs(TokenKind::Identifier()) &&
  423. tokens_.GetKind(*(position_ + 1)) == TokenKind::Colon()) {
  424. // identifier `:` type
  425. auto start = GetSubtreeStartPosition();
  426. AddLeafNode(ParseNodeKind::DeclaredName(),
  427. Consume(TokenKind::Identifier()));
  428. auto colon = Consume(TokenKind::Colon());
  429. auto type = ParseType();
  430. return AddNode(ParseNodeKind::PatternBinding(), colon, start,
  431. /*has_error=*/!type);
  432. }
  433. switch (kind) {
  434. case PatternKind::Parameter:
  435. emitter_.EmitError<ExpectedParameterName>(*position_);
  436. break;
  437. case PatternKind::Variable:
  438. emitter_.EmitError<ExpectedVariableName>(*position_);
  439. break;
  440. }
  441. return llvm::None;
  442. }
  443. auto ParseTree::Parser::ParseFunctionParameter() -> llvm::Optional<Node> {
  444. RETURN_IF_STACK_LIMITED(llvm::None);
  445. return ParsePattern(PatternKind::Parameter);
  446. }
  447. auto ParseTree::Parser::ParseFunctionSignature() -> bool {
  448. RETURN_IF_STACK_LIMITED(false);
  449. auto start = GetSubtreeStartPosition();
  450. auto params = ParseParenList(
  451. [&] { return ParseFunctionParameter(); },
  452. ParseNodeKind::ParameterListComma(),
  453. [&](TokenizedBuffer::Token open_paren, bool /*is_single_item*/,
  454. TokenizedBuffer::Token close_paren, bool has_errors) {
  455. AddLeafNode(ParseNodeKind::ParameterListEnd(), close_paren);
  456. return AddNode(ParseNodeKind::ParameterList(), open_paren, start,
  457. has_errors);
  458. });
  459. auto start_return_type = GetSubtreeStartPosition();
  460. if (auto arrow = ConsumeIf(TokenKind::MinusGreater())) {
  461. auto return_type = ParseType();
  462. AddNode(ParseNodeKind::ReturnType(), *arrow, start_return_type,
  463. /*has_error=*/!return_type);
  464. if (!return_type) {
  465. return false;
  466. }
  467. }
  468. return params.hasValue();
  469. }
  470. auto ParseTree::Parser::ParseCodeBlock() -> llvm::Optional<Node> {
  471. RETURN_IF_STACK_LIMITED(llvm::None);
  472. llvm::Optional<TokenizedBuffer::Token> maybe_open_curly =
  473. ConsumeIf(TokenKind::OpenCurlyBrace());
  474. if (!maybe_open_curly) {
  475. // Recover by parsing a single statement.
  476. emitter_.EmitError<ExpectedCodeBlock>(*position_);
  477. return ParseStatement();
  478. }
  479. TokenizedBuffer::Token open_curly = *maybe_open_curly;
  480. auto start = GetSubtreeStartPosition();
  481. bool has_errors = false;
  482. // Loop over all the different possibly nested elements in the code block.
  483. while (!NextTokenIs(TokenKind::CloseCurlyBrace())) {
  484. if (!ParseStatement()) {
  485. // We detected and diagnosed an error of some kind. We can trivially skip
  486. // to the actual close curly brace from here.
  487. // FIXME: It would be better to skip to the next semicolon, or the next
  488. // token at the start of a line with the same indent as this one.
  489. SkipTo(tokens_.GetMatchedClosingToken(open_curly));
  490. has_errors = true;
  491. break;
  492. }
  493. }
  494. // We always reach here having set our position in the token stream to the
  495. // close curly brace.
  496. AddLeafNode(ParseNodeKind::CodeBlockEnd(),
  497. Consume(TokenKind::CloseCurlyBrace()));
  498. return AddNode(ParseNodeKind::CodeBlock(), open_curly, start, has_errors);
  499. }
  500. auto ParseTree::Parser::ParseFunctionDeclaration() -> Node {
  501. TokenizedBuffer::Token function_intro_token = Consume(TokenKind::Fn());
  502. auto start = GetSubtreeStartPosition();
  503. auto add_error_function_node = [&] {
  504. return AddNode(ParseNodeKind::FunctionDeclaration(), function_intro_token,
  505. start, /*has_error=*/true);
  506. };
  507. RETURN_IF_STACK_LIMITED(add_error_function_node());
  508. auto handle_semi_in_error_recovery = [&](TokenizedBuffer::Token semi) {
  509. return AddLeafNode(ParseNodeKind::DeclarationEnd(), semi);
  510. };
  511. auto name_n = ConsumeAndAddLeafNodeIf(TokenKind::Identifier(),
  512. ParseNodeKind::DeclaredName());
  513. if (!name_n) {
  514. emitter_.EmitError<ExpectedFunctionName>(*position_);
  515. // FIXME: We could change the lexer to allow us to synthesize certain
  516. // kinds of tokens and try to "recover" here, but unclear that this is
  517. // really useful.
  518. SkipPastLikelyEnd(function_intro_token, handle_semi_in_error_recovery);
  519. return add_error_function_node();
  520. }
  521. TokenizedBuffer::Token open_paren = *position_;
  522. if (tokens_.GetKind(open_paren) != TokenKind::OpenParen()) {
  523. emitter_.EmitError<ExpectedFunctionParams>(open_paren);
  524. SkipPastLikelyEnd(function_intro_token, handle_semi_in_error_recovery);
  525. return add_error_function_node();
  526. }
  527. TokenizedBuffer::Token close_paren =
  528. tokens_.GetMatchedClosingToken(open_paren);
  529. if (!ParseFunctionSignature()) {
  530. // Don't try to parse more of the function declaration, but consume a
  531. // declaration ending semicolon if found (without going to a new line).
  532. SkipPastLikelyEnd(function_intro_token, handle_semi_in_error_recovery);
  533. return add_error_function_node();
  534. }
  535. // See if we should parse a definition which is represented as a code block.
  536. if (NextTokenIs(TokenKind::OpenCurlyBrace())) {
  537. if (!ParseCodeBlock()) {
  538. return add_error_function_node();
  539. }
  540. } else if (!ConsumeAndAddLeafNodeIf(TokenKind::Semi(),
  541. ParseNodeKind::DeclarationEnd())) {
  542. emitter_.EmitError<ExpectedFunctionBodyOrSemi>(*position_);
  543. if (tokens_.GetLine(*position_) == tokens_.GetLine(close_paren)) {
  544. // Only need to skip if we've not already found a new line.
  545. SkipPastLikelyEnd(function_intro_token, handle_semi_in_error_recovery);
  546. }
  547. return add_error_function_node();
  548. }
  549. // Successfully parsed the function, add that node.
  550. return AddNode(ParseNodeKind::FunctionDeclaration(), function_intro_token,
  551. start);
  552. }
  553. auto ParseTree::Parser::ParseVariableDeclaration() -> Node {
  554. // `var` pattern [= expression] `;`
  555. TokenizedBuffer::Token var_token = Consume(TokenKind::Var());
  556. auto start = GetSubtreeStartPosition();
  557. RETURN_IF_STACK_LIMITED(AddNode(ParseNodeKind::VariableDeclaration(),
  558. var_token, start,
  559. /*has_error=*/true));
  560. auto pattern = ParsePattern(PatternKind::Variable);
  561. if (!pattern) {
  562. if (auto after_pattern =
  563. FindNextOf({TokenKind::Equal(), TokenKind::Semi()})) {
  564. SkipTo(*after_pattern);
  565. }
  566. }
  567. auto start_init = GetSubtreeStartPosition();
  568. if (auto equal_token = ConsumeIf(TokenKind::Equal())) {
  569. auto init = ParseExpression();
  570. AddNode(ParseNodeKind::VariableInitializer(), *equal_token, start_init,
  571. /*has_error=*/!init);
  572. }
  573. auto semi = ConsumeAndAddLeafNodeIf(TokenKind::Semi(),
  574. ParseNodeKind::DeclarationEnd());
  575. if (!semi) {
  576. emitter_.EmitError<ExpectedSemiAfterExpression>(*position_);
  577. SkipPastLikelyEnd(var_token, [&](TokenizedBuffer::Token semi) {
  578. return AddLeafNode(ParseNodeKind::DeclarationEnd(), semi);
  579. });
  580. }
  581. return AddNode(ParseNodeKind::VariableDeclaration(), var_token, start,
  582. /*has_error=*/!pattern || !semi);
  583. }
  584. auto ParseTree::Parser::ParseEmptyDeclaration() -> Node {
  585. return AddLeafNode(ParseNodeKind::EmptyDeclaration(),
  586. Consume(TokenKind::Semi()));
  587. }
  588. auto ParseTree::Parser::ParseDeclaration() -> llvm::Optional<Node> {
  589. RETURN_IF_STACK_LIMITED(llvm::None);
  590. switch (NextTokenKind()) {
  591. case TokenKind::Fn():
  592. return ParseFunctionDeclaration();
  593. case TokenKind::Var():
  594. return ParseVariableDeclaration();
  595. case TokenKind::Semi():
  596. return ParseEmptyDeclaration();
  597. case TokenKind::EndOfFile():
  598. return llvm::None;
  599. default:
  600. // Errors are handled outside the switch.
  601. break;
  602. }
  603. // We didn't recognize an introducer for a valid declaration.
  604. emitter_.EmitError<UnrecognizedDeclaration>(*position_);
  605. // Skip forward past any end of a declaration we simply didn't understand so
  606. // that we can find the start of the next declaration or the end of a scope.
  607. if (auto found_semi_n =
  608. SkipPastLikelyEnd(*position_, [&](TokenizedBuffer::Token semi) {
  609. return AddLeafNode(ParseNodeKind::EmptyDeclaration(), semi);
  610. })) {
  611. MarkNodeError(*found_semi_n);
  612. return *found_semi_n;
  613. }
  614. // Nothing, not even a semicolon found.
  615. return llvm::None;
  616. }
  617. auto ParseTree::Parser::ParseParenExpression() -> llvm::Optional<Node> {
  618. RETURN_IF_STACK_LIMITED(llvm::None);
  619. // parenthesized-expression ::= `(` expression `)`
  620. // tuple-literal ::= `(` `)`
  621. // ::= `(` expression `,` [expression-list [`,`]] `)`
  622. //
  623. // Parse the union of these, `(` [expression-list [`,`]] `)`, and work out
  624. // whether it's a tuple or a parenthesized expression afterwards.
  625. auto start = GetSubtreeStartPosition();
  626. return ParseParenList(
  627. [&] { return ParseExpression(); }, ParseNodeKind::TupleLiteralComma(),
  628. [&](TokenizedBuffer::Token open_paren, bool is_single_item,
  629. TokenizedBuffer::Token close_paren, bool has_arg_errors) {
  630. AddLeafNode(is_single_item ? ParseNodeKind::ParenExpressionEnd()
  631. : ParseNodeKind::TupleLiteralEnd(),
  632. close_paren);
  633. return AddNode(is_single_item ? ParseNodeKind::ParenExpression()
  634. : ParseNodeKind::TupleLiteral(),
  635. open_paren, start, has_arg_errors);
  636. },
  637. /*allow_trailing_comma=*/true);
  638. }
  639. auto ParseTree::Parser::ParseBraceExpression() -> llvm::Optional<Node> {
  640. RETURN_IF_STACK_LIMITED(llvm::None);
  641. // braced-expression ::= `{` [field-value-list] `}`
  642. // ::= `{` field-type-list `}`
  643. // field-value-list ::= field-value [`,`]
  644. // ::= field-value `,` field-value-list
  645. // field-value ::= `.` identifier `=` expression
  646. // field-type-list ::= field-type [`,`]
  647. // ::= field-type `,` field-type-list
  648. // field-type ::= `.` identifier `:` type
  649. //
  650. // Note that `{` `}` is the first form (an empty struct), but that an empty
  651. // struct value also behaves as an empty struct type.
  652. auto start = GetSubtreeStartPosition();
  653. enum Kind { Unknown, Value, Type };
  654. Kind kind = Unknown;
  655. return ParseList(
  656. TokenKind::OpenCurlyBrace(), TokenKind::CloseCurlyBrace(),
  657. [&]() -> llvm::Optional<Node> {
  658. auto start_elem = GetSubtreeStartPosition();
  659. auto diagnose_invalid_syntax = [&] {
  660. emitter_.EmitError<ExpectedStructLiteralField>(
  661. *position_,
  662. {.can_be_type = kind != Value, .can_be_value = kind != Type});
  663. return llvm::None;
  664. };
  665. if (!NextTokenIs(TokenKind::Period())) {
  666. return diagnose_invalid_syntax();
  667. }
  668. auto designator = ParseDesignatorExpression(
  669. start_elem, ParseNodeKind::StructFieldDesignator(),
  670. /*has_errors=*/false);
  671. if (!designator) {
  672. auto recovery_pos = FindNextOf(
  673. {TokenKind::Equal(), TokenKind::Colon(), TokenKind::Comma()});
  674. if (!recovery_pos ||
  675. tokens_.GetKind(*recovery_pos) == TokenKind::Comma()) {
  676. return llvm::None;
  677. }
  678. SkipTo(*recovery_pos);
  679. }
  680. // Work out the kind of this element
  681. Kind elem_kind = (NextTokenIs(TokenKind::Equal()) ? Value
  682. : NextTokenIs(TokenKind::Colon()) ? Type
  683. : Unknown);
  684. if (elem_kind == Unknown || (kind != Unknown && elem_kind != kind)) {
  685. return diagnose_invalid_syntax();
  686. }
  687. kind = elem_kind;
  688. // Struct type fields and value fields use the same grammar except that
  689. // one has a `:` separator and the other has an `=` separator.
  690. auto equal_or_colon_token =
  691. Consume(kind == Type ? TokenKind::Colon() : TokenKind::Equal());
  692. auto type_or_value = ParseExpression();
  693. return AddNode(kind == Type ? ParseNodeKind::StructFieldType()
  694. : ParseNodeKind::StructFieldValue(),
  695. equal_or_colon_token, start_elem,
  696. /*has_error=*/!designator || !type_or_value);
  697. },
  698. ParseNodeKind::StructComma(),
  699. [&](TokenizedBuffer::Token open_brace, bool /*is_single_item*/,
  700. TokenizedBuffer::Token close_brace, bool has_errors) {
  701. AddLeafNode(ParseNodeKind::StructEnd(), close_brace);
  702. return AddNode(kind == Type ? ParseNodeKind::StructTypeLiteral()
  703. : ParseNodeKind::StructLiteral(),
  704. open_brace, start, has_errors);
  705. },
  706. /*allow_trailing_comma=*/true);
  707. }
  708. auto ParseTree::Parser::ParsePrimaryExpression() -> llvm::Optional<Node> {
  709. RETURN_IF_STACK_LIMITED(llvm::None);
  710. llvm::Optional<ParseNodeKind> kind;
  711. switch (NextTokenKind()) {
  712. case TokenKind::Identifier():
  713. kind = ParseNodeKind::NameReference();
  714. break;
  715. case TokenKind::IntegerLiteral():
  716. case TokenKind::RealLiteral():
  717. case TokenKind::StringLiteral():
  718. case TokenKind::IntegerTypeLiteral():
  719. case TokenKind::UnsignedIntegerTypeLiteral():
  720. case TokenKind::FloatingPointTypeLiteral():
  721. kind = ParseNodeKind::Literal();
  722. break;
  723. case TokenKind::OpenParen():
  724. return ParseParenExpression();
  725. case TokenKind::OpenCurlyBrace():
  726. return ParseBraceExpression();
  727. default:
  728. emitter_.EmitError<ExpectedExpression>(*position_);
  729. return llvm::None;
  730. }
  731. return AddLeafNode(*kind, Consume(NextTokenKind()));
  732. }
  733. auto ParseTree::Parser::ParseDesignatorExpression(SubtreeStart start,
  734. ParseNodeKind kind,
  735. bool has_errors)
  736. -> llvm::Optional<Node> {
  737. // `.` identifier
  738. auto dot = Consume(TokenKind::Period());
  739. auto name = ConsumeIf(TokenKind::Identifier());
  740. if (name) {
  741. AddLeafNode(ParseNodeKind::DesignatedName(), *name);
  742. } else {
  743. emitter_.EmitError<ExpectedIdentifierAfterDot>(*position_);
  744. // If we see a keyword, assume it was intended to be the designated name.
  745. // TODO: Should keywords be valid in designators?
  746. if (NextTokenKind().IsKeyword()) {
  747. name = Consume(NextTokenKind());
  748. auto name_node = AddLeafNode(ParseNodeKind::DesignatedName(), *name);
  749. MarkNodeError(name_node);
  750. } else {
  751. has_errors = true;
  752. }
  753. }
  754. Node result = AddNode(kind, dot, start, has_errors);
  755. return name ? result : llvm::Optional<Node>();
  756. }
  757. auto ParseTree::Parser::ParseCallExpression(SubtreeStart start, bool has_errors)
  758. -> llvm::Optional<Node> {
  759. RETURN_IF_STACK_LIMITED(llvm::None);
  760. // `(` expression-list[opt] `)`
  761. //
  762. // expression-list ::= expression
  763. // ::= expression `,` expression-list
  764. return ParseParenList(
  765. [&] { return ParseExpression(); }, ParseNodeKind::CallExpressionComma(),
  766. [&](TokenizedBuffer::Token open_paren, bool /*is_single_item*/,
  767. TokenizedBuffer::Token close_paren, bool has_arg_errors) {
  768. AddLeafNode(ParseNodeKind::CallExpressionEnd(), close_paren);
  769. return AddNode(ParseNodeKind::CallExpression(), open_paren, start,
  770. has_errors || has_arg_errors);
  771. });
  772. }
  773. auto ParseTree::Parser::ParsePostfixExpression() -> llvm::Optional<Node> {
  774. RETURN_IF_STACK_LIMITED(llvm::None);
  775. auto start = GetSubtreeStartPosition();
  776. llvm::Optional<Node> expression = ParsePrimaryExpression();
  777. TokenizedBuffer::TokenIterator last_position = position_;
  778. while (true) {
  779. switch (NextTokenKind()) {
  780. case TokenKind::Period():
  781. expression = ParseDesignatorExpression(
  782. start, ParseNodeKind::DesignatorExpression(), !expression);
  783. break;
  784. case TokenKind::OpenParen():
  785. expression = ParseCallExpression(start, !expression);
  786. break;
  787. default:
  788. return expression;
  789. }
  790. // This is subject to an infinite loop if a child call fails, so monitor for
  791. // stalling.
  792. if (last_position == position_) {
  793. CHECK(expression == llvm::None);
  794. return expression;
  795. }
  796. last_position = position_;
  797. }
  798. }
  799. // Determines whether the given token is considered to be the start of an
  800. // operand according to the rules for infix operator parsing.
  801. static auto IsAssumedStartOfOperand(TokenKind kind) -> bool {
  802. return kind.IsOneOf({TokenKind::OpenParen(), TokenKind::Identifier(),
  803. TokenKind::IntegerLiteral(), TokenKind::RealLiteral(),
  804. TokenKind::StringLiteral()});
  805. }
  806. // Determines whether the given token is considered to be the end of an operand
  807. // according to the rules for infix operator parsing.
  808. static auto IsAssumedEndOfOperand(TokenKind kind) -> bool {
  809. return kind.IsOneOf({TokenKind::CloseParen(), TokenKind::CloseCurlyBrace(),
  810. TokenKind::CloseSquareBracket(), TokenKind::Identifier(),
  811. TokenKind::IntegerLiteral(), TokenKind::RealLiteral(),
  812. TokenKind::StringLiteral()});
  813. }
  814. // Determines whether the given token could possibly be the start of an operand.
  815. // This is conservatively correct, and will never incorrectly return `false`,
  816. // but can incorrectly return `true`.
  817. static auto IsPossibleStartOfOperand(TokenKind kind) -> bool {
  818. return !kind.IsOneOf({TokenKind::CloseParen(), TokenKind::CloseCurlyBrace(),
  819. TokenKind::CloseSquareBracket(), TokenKind::Comma(),
  820. TokenKind::Semi(), TokenKind::Colon()});
  821. }
  822. auto ParseTree::Parser::IsLexicallyValidInfixOperator() -> bool {
  823. CHECK(!AtEndOfFile()) << "Expected an operator token.";
  824. bool leading_space = tokens_.HasLeadingWhitespace(*position_);
  825. bool trailing_space = tokens_.HasTrailingWhitespace(*position_);
  826. // If there's whitespace on both sides, it's an infix operator.
  827. if (leading_space && trailing_space) {
  828. return true;
  829. }
  830. // If there's whitespace on exactly one side, it's not an infix operator.
  831. if (leading_space || trailing_space) {
  832. return false;
  833. }
  834. // Otherwise, for an infix operator, the preceding token must be any close
  835. // bracket, identifier, or literal and the next token must be an open paren,
  836. // identifier, or literal.
  837. if (position_ == tokens_.tokens().begin() ||
  838. !IsAssumedEndOfOperand(tokens_.GetKind(*(position_ - 1))) ||
  839. !IsAssumedStartOfOperand(tokens_.GetKind(*(position_ + 1)))) {
  840. return false;
  841. }
  842. return true;
  843. }
  844. auto ParseTree::Parser::DiagnoseOperatorFixity(OperatorFixity fixity) -> void {
  845. bool is_valid_as_infix = IsLexicallyValidInfixOperator();
  846. if (fixity == OperatorFixity::Infix) {
  847. // Infix operators must satisfy the infix operator rules.
  848. if (!is_valid_as_infix) {
  849. emitter_.EmitError<BinaryOperatorRequiresWhitespace>(
  850. *position_,
  851. {.has_leading_space = tokens_.HasLeadingWhitespace(*position_),
  852. .has_trailing_space = tokens_.HasTrailingWhitespace(*position_)});
  853. }
  854. } else {
  855. bool prefix = fixity == OperatorFixity::Prefix;
  856. // Whitespace is not permitted between a symbolic pre/postfix operator and
  857. // its operand.
  858. if (NextTokenKind().IsSymbol() &&
  859. (prefix ? tokens_.HasTrailingWhitespace(*position_)
  860. : tokens_.HasLeadingWhitespace(*position_))) {
  861. emitter_.EmitError<UnaryOperatorHasWhitespace>(*position_,
  862. {.prefix = prefix});
  863. }
  864. // Pre/postfix operators must not satisfy the infix operator rules.
  865. if (is_valid_as_infix) {
  866. emitter_.EmitError<UnaryOperatorRequiresWhitespace>(*position_,
  867. {.prefix = prefix});
  868. }
  869. }
  870. }
  871. auto ParseTree::Parser::IsTrailingOperatorInfix() -> bool {
  872. if (AtEndOfFile()) {
  873. return false;
  874. }
  875. // An operator that follows the infix operator rules is parsed as
  876. // infix, unless the next token means that it can't possibly be.
  877. if (IsLexicallyValidInfixOperator() &&
  878. IsPossibleStartOfOperand(tokens_.GetKind(*(position_ + 1)))) {
  879. return true;
  880. }
  881. // A trailing operator with leading whitespace that's not valid as infix is
  882. // not valid at all. If the next token looks like the start of an operand,
  883. // then parse as infix, otherwise as postfix. Either way we'll produce a
  884. // diagnostic later on.
  885. if (tokens_.HasLeadingWhitespace(*position_) &&
  886. IsAssumedStartOfOperand(tokens_.GetKind(*(position_ + 1)))) {
  887. return true;
  888. }
  889. return false;
  890. }
  891. auto ParseTree::Parser::ParseOperatorExpression(
  892. PrecedenceGroup ambient_precedence) -> llvm::Optional<Node> {
  893. RETURN_IF_STACK_LIMITED(llvm::None);
  894. auto start = GetSubtreeStartPosition();
  895. llvm::Optional<Node> lhs;
  896. PrecedenceGroup lhs_precedence = PrecedenceGroup::ForPostfixExpression();
  897. // Check for a prefix operator.
  898. if (auto operator_precedence = PrecedenceGroup::ForLeading(NextTokenKind());
  899. !operator_precedence) {
  900. lhs = ParsePostfixExpression();
  901. } else {
  902. if (PrecedenceGroup::GetPriority(ambient_precedence,
  903. *operator_precedence) !=
  904. OperatorPriority::RightFirst) {
  905. // The precedence rules don't permit this prefix operator in this
  906. // context. Diagnose this, but carry on and parse it anyway.
  907. emitter_.EmitError<OperatorRequiresParentheses>(*position_);
  908. } else {
  909. // Check that this operator follows the proper whitespace rules.
  910. DiagnoseOperatorFixity(OperatorFixity::Prefix);
  911. }
  912. auto operator_token = Consume(NextTokenKind());
  913. bool has_errors = !ParseOperatorExpression(*operator_precedence);
  914. lhs = AddNode(ParseNodeKind::PrefixOperator(), operator_token, start,
  915. has_errors);
  916. lhs_precedence = *operator_precedence;
  917. }
  918. // Consume a sequence of infix and postfix operators.
  919. while (auto trailing_operator = PrecedenceGroup::ForTrailing(
  920. NextTokenKind(), IsTrailingOperatorInfix())) {
  921. auto [operator_precedence, is_binary] = *trailing_operator;
  922. // FIXME: If this operator is ambiguous with either the ambient precedence
  923. // or the LHS precedence, and there's a variant with a different fixity
  924. // that would work, use that one instead for error recovery.
  925. if (PrecedenceGroup::GetPriority(ambient_precedence, operator_precedence) !=
  926. OperatorPriority::RightFirst) {
  927. // The precedence rules don't permit this operator in this context. Try
  928. // again in the enclosing expression context.
  929. return lhs;
  930. }
  931. if (PrecedenceGroup::GetPriority(lhs_precedence, operator_precedence) !=
  932. OperatorPriority::LeftFirst) {
  933. // Either the LHS operator and this operator are ambiguous, or the
  934. // LHS operaor is a unary operator that can't be nested within
  935. // this operator. Either way, parentheses are required.
  936. emitter_.EmitError<OperatorRequiresParentheses>(*position_);
  937. lhs = llvm::None;
  938. } else {
  939. DiagnoseOperatorFixity(is_binary ? OperatorFixity::Infix
  940. : OperatorFixity::Postfix);
  941. }
  942. auto operator_token = Consume(NextTokenKind());
  943. if (is_binary) {
  944. auto rhs = ParseOperatorExpression(operator_precedence);
  945. lhs = AddNode(ParseNodeKind::InfixOperator(), operator_token, start,
  946. /*has_error=*/!lhs || !rhs);
  947. } else {
  948. lhs = AddNode(ParseNodeKind::PostfixOperator(), operator_token, start,
  949. /*has_error=*/!lhs);
  950. }
  951. lhs_precedence = operator_precedence;
  952. }
  953. return lhs;
  954. }
  955. auto ParseTree::Parser::ParseExpression() -> llvm::Optional<Node> {
  956. RETURN_IF_STACK_LIMITED(llvm::None);
  957. return ParseOperatorExpression(PrecedenceGroup::ForTopLevelExpression());
  958. }
  959. auto ParseTree::Parser::ParseType() -> llvm::Optional<Node> {
  960. RETURN_IF_STACK_LIMITED(llvm::None);
  961. return ParseOperatorExpression(PrecedenceGroup::ForType());
  962. }
  963. auto ParseTree::Parser::ParseExpressionStatement() -> llvm::Optional<Node> {
  964. RETURN_IF_STACK_LIMITED(llvm::None);
  965. TokenizedBuffer::Token start_token = *position_;
  966. auto start = GetSubtreeStartPosition();
  967. bool has_errors = !ParseExpression();
  968. if (auto semi = ConsumeIf(TokenKind::Semi())) {
  969. return AddNode(ParseNodeKind::ExpressionStatement(), *semi, start,
  970. has_errors);
  971. }
  972. if (!has_errors) {
  973. emitter_.EmitError<ExpectedSemiAfterExpression>(*position_);
  974. }
  975. if (auto recovery_node =
  976. SkipPastLikelyEnd(start_token, [&](TokenizedBuffer::Token semi) {
  977. return AddNode(ParseNodeKind::ExpressionStatement(), semi, start,
  978. true);
  979. })) {
  980. return recovery_node;
  981. }
  982. // Found junk not even followed by a `;`.
  983. return llvm::None;
  984. }
  985. auto ParseTree::Parser::ParseParenCondition(TokenKind introducer)
  986. -> llvm::Optional<Node> {
  987. RETURN_IF_STACK_LIMITED(llvm::None);
  988. // `(` expression `)`
  989. auto start = GetSubtreeStartPosition();
  990. auto open_paren = ConsumeIf(TokenKind::OpenParen());
  991. if (!open_paren) {
  992. emitter_.EmitError<ExpectedParenAfter>(*position_,
  993. {.introducer = introducer});
  994. }
  995. auto expr = ParseExpression();
  996. if (!open_paren) {
  997. // Don't expect a matching closing paren if there wasn't an opening paren.
  998. return llvm::None;
  999. }
  1000. auto close_paren =
  1001. ParseCloseParen(*open_paren, ParseNodeKind::ConditionEnd());
  1002. return AddNode(ParseNodeKind::Condition(), *open_paren, start,
  1003. /*has_error=*/!expr || !close_paren);
  1004. }
  1005. auto ParseTree::Parser::ParseIfStatement() -> llvm::Optional<Node> {
  1006. auto start = GetSubtreeStartPosition();
  1007. auto if_token = Consume(TokenKind::If());
  1008. auto cond = ParseParenCondition(TokenKind::If());
  1009. auto then_case = ParseCodeBlock();
  1010. bool else_has_errors = false;
  1011. if (ConsumeAndAddLeafNodeIf(TokenKind::Else(),
  1012. ParseNodeKind::IfStatementElse())) {
  1013. // 'else if' is permitted as a special case.
  1014. if (NextTokenIs(TokenKind::If())) {
  1015. else_has_errors = !ParseIfStatement();
  1016. } else {
  1017. else_has_errors = !ParseCodeBlock();
  1018. }
  1019. }
  1020. return AddNode(ParseNodeKind::IfStatement(), if_token, start,
  1021. /*has_error=*/!cond || !then_case || else_has_errors);
  1022. }
  1023. auto ParseTree::Parser::ParseWhileStatement() -> llvm::Optional<Node> {
  1024. RETURN_IF_STACK_LIMITED(llvm::None);
  1025. auto start = GetSubtreeStartPosition();
  1026. auto while_token = Consume(TokenKind::While());
  1027. auto cond = ParseParenCondition(TokenKind::While());
  1028. auto body = ParseCodeBlock();
  1029. return AddNode(ParseNodeKind::WhileStatement(), while_token, start,
  1030. /*has_error=*/!cond || !body);
  1031. }
  1032. auto ParseTree::Parser::ParseKeywordStatement(ParseNodeKind kind,
  1033. KeywordStatementArgument argument)
  1034. -> llvm::Optional<Node> {
  1035. RETURN_IF_STACK_LIMITED(llvm::None);
  1036. auto keyword_kind = NextTokenKind();
  1037. CHECK(keyword_kind.IsKeyword());
  1038. auto start = GetSubtreeStartPosition();
  1039. auto keyword = Consume(keyword_kind);
  1040. bool arg_error = false;
  1041. if ((argument == KeywordStatementArgument::Optional &&
  1042. NextTokenKind() != TokenKind::Semi()) ||
  1043. argument == KeywordStatementArgument::Mandatory) {
  1044. arg_error = !ParseExpression();
  1045. }
  1046. auto semi =
  1047. ConsumeAndAddLeafNodeIf(TokenKind::Semi(), ParseNodeKind::StatementEnd());
  1048. if (!semi) {
  1049. emitter_.EmitError<ExpectedSemiAfter>(*position_,
  1050. {.preceding = keyword_kind});
  1051. // FIXME: Try to skip to a semicolon to recover.
  1052. }
  1053. return AddNode(kind, keyword, start, /*has_error=*/!semi || arg_error);
  1054. }
  1055. auto ParseTree::Parser::ParseStatement() -> llvm::Optional<Node> {
  1056. RETURN_IF_STACK_LIMITED(llvm::None);
  1057. switch (NextTokenKind()) {
  1058. case TokenKind::Var():
  1059. return ParseVariableDeclaration();
  1060. case TokenKind::If():
  1061. return ParseIfStatement();
  1062. case TokenKind::While():
  1063. return ParseWhileStatement();
  1064. case TokenKind::Continue():
  1065. return ParseKeywordStatement(ParseNodeKind::ContinueStatement(),
  1066. KeywordStatementArgument::None);
  1067. case TokenKind::Break():
  1068. return ParseKeywordStatement(ParseNodeKind::BreakStatement(),
  1069. KeywordStatementArgument::None);
  1070. case TokenKind::Return():
  1071. return ParseKeywordStatement(ParseNodeKind::ReturnStatement(),
  1072. KeywordStatementArgument::Optional);
  1073. default:
  1074. // A statement with no introducer token can only be an expression
  1075. // statement.
  1076. return ParseExpressionStatement();
  1077. }
  1078. }
  1079. } // namespace Carbon