parser_impl.cpp 44 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "toolchain/parser/parser_impl.h"
  5. #include <cstdlib>
  6. #include "common/check.h"
  7. #include "llvm/ADT/Optional.h"
  8. #include "llvm/Support/FormatVariadic.h"
  9. #include "llvm/Support/raw_ostream.h"
  10. #include "toolchain/lexer/token_kind.h"
  11. #include "toolchain/lexer/tokenized_buffer.h"
  12. #include "toolchain/parser/parse_node_kind.h"
  13. #include "toolchain/parser/parse_tree.h"
  14. namespace Carbon {
  15. struct StackLimitExceeded {
  16. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  17. auto Format() -> std::string {
  18. return llvm::formatv("Exceeded recursion limit ({0})",
  19. ParseTree::StackDepthLimit);
  20. }
  21. };
  22. // Manages the parser's stack depth, particularly decrementing on destruction.
  23. // This should only be instantiated through RETURN_IF_STACK_LIMITED.
  24. class ParseTree::Parser::ScopedStackStep {
  25. public:
  26. explicit ScopedStackStep(ParseTree::Parser* parser) : parser_(parser) {
  27. ++parser_->stack_depth_;
  28. }
  29. ~ScopedStackStep() { --parser_->stack_depth_; }
  30. auto VerifyUnderLimit() -> bool {
  31. if (parser_->stack_depth_ >= StackDepthLimit) {
  32. parser_->emitter_.EmitError<StackLimitExceeded>(*parser_->position_);
  33. return false;
  34. }
  35. return true;
  36. }
  37. private:
  38. ParseTree::Parser* parser_;
  39. };
  40. // Encapsulates checking the stack and erroring if needed. This should be called
  41. // at the start of every parse function.
  42. #define RETURN_IF_STACK_LIMITED(error_return_expr) \
  43. ScopedStackStep scoped_stack_step(this); \
  44. if (!scoped_stack_step.VerifyUnderLimit()) { \
  45. return (error_return_expr); \
  46. }
  47. struct UnexpectedTokenInCodeBlock
  48. : SimpleDiagnostic<UnexpectedTokenInCodeBlock> {
  49. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  50. static constexpr llvm::StringLiteral Message =
  51. "Unexpected token in code block.";
  52. };
  53. struct ExpectedFunctionName : SimpleDiagnostic<ExpectedFunctionName> {
  54. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  55. static constexpr llvm::StringLiteral Message =
  56. "Expected function name after `fn` keyword.";
  57. };
  58. struct ExpectedFunctionParams : SimpleDiagnostic<ExpectedFunctionParams> {
  59. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  60. static constexpr llvm::StringLiteral Message =
  61. "Expected `(` after function name.";
  62. };
  63. struct ExpectedFunctionBodyOrSemi
  64. : SimpleDiagnostic<ExpectedFunctionBodyOrSemi> {
  65. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  66. static constexpr llvm::StringLiteral Message =
  67. "Expected function definition or `;` after function declaration.";
  68. };
  69. struct ExpectedVariableName : SimpleDiagnostic<ExpectedVariableName> {
  70. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  71. static constexpr llvm::StringLiteral Message =
  72. "Expected pattern in `var` declaration.";
  73. };
  74. struct ExpectedParameterName : SimpleDiagnostic<ExpectedParameterName> {
  75. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  76. static constexpr llvm::StringLiteral Message =
  77. "Expected parameter declaration.";
  78. };
  79. struct ExpectedStructLiteralField
  80. : SimpleDiagnostic<ExpectedStructLiteralField> {
  81. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  82. auto Format() -> std::string {
  83. std::string result = "Expected ";
  84. if (can_be_type) {
  85. result += "`.field: type`";
  86. }
  87. if (can_be_type && can_be_value) {
  88. result += " or ";
  89. }
  90. if (can_be_value) {
  91. result += "`.field = value`";
  92. }
  93. result += ".";
  94. return result;
  95. }
  96. bool can_be_type;
  97. bool can_be_value;
  98. };
  99. struct UnrecognizedDeclaration : SimpleDiagnostic<UnrecognizedDeclaration> {
  100. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  101. static constexpr llvm::StringLiteral Message =
  102. "Unrecognized declaration introducer.";
  103. };
  104. struct ExpectedCodeBlock : SimpleDiagnostic<ExpectedCodeBlock> {
  105. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  106. static constexpr llvm::StringLiteral Message = "Expected braced code block.";
  107. };
  108. struct ExpectedExpression : SimpleDiagnostic<ExpectedExpression> {
  109. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  110. static constexpr llvm::StringLiteral Message = "Expected expression.";
  111. };
  112. struct ExpectedParenAfter : SimpleDiagnostic<ExpectedParenAfter> {
  113. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  114. static constexpr const char* Message = "Expected `(` after `{0}`.";
  115. auto Format() -> std::string {
  116. return llvm::formatv(Message, introducer.GetFixedSpelling()).str();
  117. }
  118. TokenKind introducer;
  119. };
  120. struct ExpectedCloseParen : SimpleDiagnostic<ExpectedCloseParen> {
  121. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  122. static constexpr llvm::StringLiteral Message =
  123. "Unexpected tokens before `)`.";
  124. // TODO: Include the location of the matching open paren in the diagnostic.
  125. TokenizedBuffer::Token open_paren;
  126. };
  127. struct ExpectedSemiAfterExpression
  128. : SimpleDiagnostic<ExpectedSemiAfterExpression> {
  129. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  130. static constexpr llvm::StringLiteral Message =
  131. "Expected `;` after expression.";
  132. };
  133. struct ExpectedSemiAfter : SimpleDiagnostic<ExpectedSemiAfter> {
  134. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  135. static constexpr const char* Message = "Expected `;` after `{0}`.";
  136. auto Format() -> std::string {
  137. return llvm::formatv(Message, preceding.GetFixedSpelling()).str();
  138. }
  139. TokenKind preceding;
  140. };
  141. struct ExpectedIdentifierAfterDot
  142. : SimpleDiagnostic<ExpectedIdentifierAfterDot> {
  143. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  144. static constexpr llvm::StringLiteral Message =
  145. "Expected identifier after `.`.";
  146. };
  147. struct UnexpectedTokenAfterListElement
  148. : SimpleDiagnostic<UnexpectedTokenAfterListElement> {
  149. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  150. static constexpr const char* Message = "Expected `,` or `{0}`.";
  151. auto Format() -> std::string {
  152. return llvm::formatv(Message, close.GetFixedSpelling()).str();
  153. }
  154. TokenKind close;
  155. };
  156. struct BinaryOperatorRequiresWhitespace
  157. : SimpleDiagnostic<BinaryOperatorRequiresWhitespace> {
  158. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  159. static constexpr const char* Message =
  160. "Whitespace missing {0} binary operator.";
  161. auto Format() -> std::string {
  162. const char* position = "around";
  163. if (has_leading_space) {
  164. position = "after";
  165. } else if (has_trailing_space) {
  166. position = "before";
  167. }
  168. return llvm::formatv(Message, position);
  169. }
  170. bool has_leading_space;
  171. bool has_trailing_space;
  172. };
  173. struct UnaryOperatorHasWhitespace
  174. : SimpleDiagnostic<UnaryOperatorHasWhitespace> {
  175. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  176. static constexpr const char* Message =
  177. "Whitespace is not allowed {0} this unary operator.";
  178. auto Format() -> std::string {
  179. return llvm::formatv(Message, prefix ? "after" : "before");
  180. }
  181. bool prefix;
  182. };
  183. struct UnaryOperatorRequiresWhitespace
  184. : SimpleDiagnostic<UnaryOperatorRequiresWhitespace> {
  185. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  186. static constexpr const char* Message =
  187. "Whitespace is required {0} this unary operator.";
  188. auto Format() -> std::string {
  189. return llvm::formatv(Message, prefix ? "before" : "after");
  190. }
  191. bool prefix;
  192. };
  193. struct OperatorRequiresParentheses
  194. : SimpleDiagnostic<OperatorRequiresParentheses> {
  195. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  196. static constexpr llvm::StringLiteral Message =
  197. "Parentheses are required to disambiguate operator precedence.";
  198. };
  199. ParseTree::Parser::Parser(ParseTree& tree_arg, TokenizedBuffer& tokens_arg,
  200. TokenDiagnosticEmitter& emitter)
  201. : tree_(tree_arg),
  202. tokens_(tokens_arg),
  203. emitter_(emitter),
  204. position_(tokens_.Tokens().begin()),
  205. end_(tokens_.Tokens().end()) {
  206. CHECK(std::find_if(position_, end_,
  207. [&](TokenizedBuffer::Token t) {
  208. return tokens_.GetKind(t) == TokenKind::EndOfFile();
  209. }) != end_)
  210. << "No EndOfFileToken in token buffer.";
  211. }
  212. auto ParseTree::Parser::Parse(TokenizedBuffer& tokens,
  213. TokenDiagnosticEmitter& emitter) -> ParseTree {
  214. ParseTree tree(tokens);
  215. // We expect to have a 1:1 correspondence between tokens and tree nodes, so
  216. // reserve the space we expect to need here to avoid allocation and copying
  217. // overhead.
  218. tree.node_impls_.reserve(tokens.Size());
  219. Parser parser(tree, tokens, emitter);
  220. while (!parser.AtEndOfFile()) {
  221. if (!parser.ParseDeclaration()) {
  222. // We don't have an enclosing parse tree node to mark as erroneous, so
  223. // just mark the tree as a whole.
  224. tree.has_errors_ = true;
  225. }
  226. }
  227. parser.AddLeafNode(ParseNodeKind::FileEnd(), *parser.position_);
  228. CHECK(tree.Verify()) << "Parse tree built but does not verify!";
  229. return tree;
  230. }
  231. auto ParseTree::Parser::Consume(TokenKind kind) -> TokenizedBuffer::Token {
  232. CHECK(kind != TokenKind::EndOfFile()) << "Cannot consume the EOF token!";
  233. CHECK(NextTokenIs(kind)) << "The current token is the wrong kind!";
  234. TokenizedBuffer::Token t = *position_;
  235. ++position_;
  236. CHECK(position_ != end_)
  237. << "Reached end of tokens without finding EOF token.";
  238. return t;
  239. }
  240. auto ParseTree::Parser::ConsumeIf(TokenKind kind)
  241. -> llvm::Optional<TokenizedBuffer::Token> {
  242. if (!NextTokenIs(kind)) {
  243. return {};
  244. }
  245. return Consume(kind);
  246. }
  247. auto ParseTree::Parser::AddLeafNode(ParseNodeKind kind,
  248. TokenizedBuffer::Token token) -> Node {
  249. Node n(tree_.node_impls_.size());
  250. tree_.node_impls_.push_back(NodeImpl(kind, token, /*subtree_size_arg=*/1));
  251. return n;
  252. }
  253. auto ParseTree::Parser::ConsumeAndAddLeafNodeIf(TokenKind t_kind,
  254. ParseNodeKind n_kind)
  255. -> llvm::Optional<Node> {
  256. auto t = ConsumeIf(t_kind);
  257. if (!t) {
  258. return {};
  259. }
  260. return AddLeafNode(n_kind, *t);
  261. }
  262. auto ParseTree::Parser::MarkNodeError(Node n) -> void {
  263. tree_.node_impls_[n.index_].has_error = true;
  264. tree_.has_errors_ = true;
  265. }
  266. // A marker for the start of a node's subtree.
  267. //
  268. // This is used to track the size of the node's subtree. It can be used
  269. // repeatedly if multiple subtrees start at the same position.
  270. struct ParseTree::Parser::SubtreeStart {
  271. int tree_size;
  272. };
  273. auto ParseTree::Parser::GetSubtreeStartPosition() -> SubtreeStart {
  274. return {static_cast<int>(tree_.node_impls_.size())};
  275. }
  276. auto ParseTree::Parser::AddNode(ParseNodeKind n_kind, TokenizedBuffer::Token t,
  277. SubtreeStart start, bool has_error) -> Node {
  278. // The size of the subtree is the change in size from when we started this
  279. // subtree to now, but including the node we're about to add.
  280. int tree_stop_size = static_cast<int>(tree_.node_impls_.size()) + 1;
  281. int subtree_size = tree_stop_size - start.tree_size;
  282. Node n(tree_.node_impls_.size());
  283. tree_.node_impls_.push_back(NodeImpl(n_kind, t, subtree_size));
  284. if (has_error) {
  285. MarkNodeError(n);
  286. }
  287. return n;
  288. }
  289. auto ParseTree::Parser::SkipMatchingGroup() -> bool {
  290. TokenizedBuffer::Token t = *position_;
  291. TokenKind t_kind = tokens_.GetKind(t);
  292. if (!t_kind.IsOpeningSymbol()) {
  293. return false;
  294. }
  295. SkipTo(tokens_.GetMatchedClosingToken(t));
  296. Consume(t_kind.GetClosingSymbol());
  297. return true;
  298. }
  299. auto ParseTree::Parser::SkipTo(TokenizedBuffer::Token t) -> void {
  300. CHECK(t >= *position_) << "Tried to skip backwards.";
  301. position_ = TokenizedBuffer::TokenIterator(t);
  302. CHECK(position_ != end_) << "Skipped past EOF.";
  303. }
  304. auto ParseTree::Parser::FindNextOf(
  305. std::initializer_list<TokenKind> desired_kinds)
  306. -> llvm::Optional<TokenizedBuffer::Token> {
  307. auto new_position = position_;
  308. while (true) {
  309. TokenizedBuffer::Token token = *new_position;
  310. TokenKind kind = tokens_.GetKind(token);
  311. if (kind.IsOneOf(desired_kinds)) {
  312. return token;
  313. }
  314. // Step to the next token at the current bracketing level.
  315. if (kind.IsClosingSymbol() || kind == TokenKind::EndOfFile()) {
  316. // There are no more tokens at this level.
  317. return llvm::None;
  318. } else if (kind.IsOpeningSymbol()) {
  319. new_position =
  320. TokenizedBuffer::TokenIterator(tokens_.GetMatchedClosingToken(token));
  321. // Advance past the closing token.
  322. ++new_position;
  323. } else {
  324. ++new_position;
  325. }
  326. }
  327. }
  328. auto ParseTree::Parser::SkipPastLikelyEnd(TokenizedBuffer::Token skip_root,
  329. SemiHandler on_semi)
  330. -> llvm::Optional<Node> {
  331. if (AtEndOfFile()) {
  332. return llvm::None;
  333. }
  334. TokenizedBuffer::Line root_line = tokens_.GetLine(skip_root);
  335. int root_line_indent = tokens_.GetIndentColumnNumber(root_line);
  336. // We will keep scanning through tokens on the same line as the root or
  337. // lines with greater indentation than root's line.
  338. auto is_same_line_or_indent_greater_than_root =
  339. [&](TokenizedBuffer::Token t) {
  340. TokenizedBuffer::Line l = tokens_.GetLine(t);
  341. if (l == root_line) {
  342. return true;
  343. }
  344. return tokens_.GetIndentColumnNumber(l) > root_line_indent;
  345. };
  346. do {
  347. if (NextTokenKind() == TokenKind::CloseCurlyBrace()) {
  348. // Immediately bail out if we hit an unmatched close curly, this will
  349. // pop us up a level of the syntax grouping.
  350. return llvm::None;
  351. }
  352. // We assume that a semicolon is always intended to be the end of the
  353. // current construct.
  354. if (auto semi = ConsumeIf(TokenKind::Semi())) {
  355. return on_semi(*semi);
  356. }
  357. // Skip over any matching group of tokens_.
  358. if (SkipMatchingGroup()) {
  359. continue;
  360. }
  361. // Otherwise just step forward one token.
  362. Consume(NextTokenKind());
  363. } while (!AtEndOfFile() &&
  364. is_same_line_or_indent_greater_than_root(*position_));
  365. return llvm::None;
  366. }
  367. auto ParseTree::Parser::ParseCloseParen(TokenizedBuffer::Token open_paren,
  368. ParseNodeKind kind)
  369. -> llvm::Optional<Node> {
  370. if (auto close_paren =
  371. ConsumeAndAddLeafNodeIf(TokenKind::CloseParen(), kind)) {
  372. return close_paren;
  373. }
  374. emitter_.EmitError<ExpectedCloseParen>(*position_,
  375. {.open_paren = open_paren});
  376. SkipTo(tokens_.GetMatchedClosingToken(open_paren));
  377. AddLeafNode(kind, Consume(TokenKind::CloseParen()));
  378. return llvm::None;
  379. }
  380. template <typename ListElementParser, typename ListCompletionHandler>
  381. auto ParseTree::Parser::ParseList(TokenKind open, TokenKind close,
  382. ListElementParser list_element_parser,
  383. ParseNodeKind comma_kind,
  384. ListCompletionHandler list_handler,
  385. bool allow_trailing_comma)
  386. -> llvm::Optional<Node> {
  387. // `(` element-list[opt] `)`
  388. //
  389. // element-list ::= element
  390. // ::= element `,` element-list
  391. TokenizedBuffer::Token open_paren = Consume(open);
  392. bool has_errors = false;
  393. bool any_commas = false;
  394. int64_t num_elements = 0;
  395. // Parse elements, if any are specified.
  396. if (!NextTokenIs(close)) {
  397. while (true) {
  398. bool element_error = !list_element_parser();
  399. has_errors |= element_error;
  400. ++num_elements;
  401. if (!NextTokenIsOneOf({close, TokenKind::Comma()})) {
  402. if (!element_error) {
  403. emitter_.EmitError<UnexpectedTokenAfterListElement>(*position_,
  404. {.close = close});
  405. }
  406. has_errors = true;
  407. auto end_of_element = FindNextOf({TokenKind::Comma(), close});
  408. // The lexer guarantees that parentheses are balanced.
  409. CHECK(end_of_element) << "missing matching `)` for `(`";
  410. SkipTo(*end_of_element);
  411. }
  412. if (NextTokenIs(close)) {
  413. break;
  414. }
  415. AddLeafNode(comma_kind, Consume(TokenKind::Comma()));
  416. any_commas = true;
  417. if (allow_trailing_comma && NextTokenIs(close)) {
  418. break;
  419. }
  420. }
  421. }
  422. bool is_single_item = num_elements == 1 && !any_commas;
  423. return list_handler(open_paren, is_single_item, Consume(close), has_errors);
  424. }
  425. auto ParseTree::Parser::ParsePattern(PatternKind kind) -> llvm::Optional<Node> {
  426. RETURN_IF_STACK_LIMITED(llvm::None);
  427. if (NextTokenIs(TokenKind::Identifier()) &&
  428. tokens_.GetKind(*(position_ + 1)) == TokenKind::Colon()) {
  429. // identifier `:` type
  430. auto start = GetSubtreeStartPosition();
  431. AddLeafNode(ParseNodeKind::DeclaredName(),
  432. Consume(TokenKind::Identifier()));
  433. auto colon = Consume(TokenKind::Colon());
  434. auto type = ParseType();
  435. return AddNode(ParseNodeKind::PatternBinding(), colon, start,
  436. /*has_error=*/!type);
  437. }
  438. switch (kind) {
  439. case PatternKind::Parameter:
  440. emitter_.EmitError<ExpectedParameterName>(*position_);
  441. break;
  442. case PatternKind::Variable:
  443. emitter_.EmitError<ExpectedVariableName>(*position_);
  444. break;
  445. }
  446. return llvm::None;
  447. }
  448. auto ParseTree::Parser::ParseFunctionParameter() -> llvm::Optional<Node> {
  449. RETURN_IF_STACK_LIMITED(llvm::None);
  450. return ParsePattern(PatternKind::Parameter);
  451. }
  452. auto ParseTree::Parser::ParseFunctionSignature() -> bool {
  453. RETURN_IF_STACK_LIMITED(false);
  454. auto start = GetSubtreeStartPosition();
  455. auto params = ParseParenList(
  456. [&] { return ParseFunctionParameter(); },
  457. ParseNodeKind::ParameterListComma(),
  458. [&](TokenizedBuffer::Token open_paren, bool /*is_single_item*/,
  459. TokenizedBuffer::Token close_paren, bool has_errors) {
  460. AddLeafNode(ParseNodeKind::ParameterListEnd(), close_paren);
  461. return AddNode(ParseNodeKind::ParameterList(), open_paren, start,
  462. has_errors);
  463. });
  464. auto start_return_type = GetSubtreeStartPosition();
  465. if (auto arrow = ConsumeIf(TokenKind::MinusGreater())) {
  466. auto return_type = ParseType();
  467. AddNode(ParseNodeKind::ReturnType(), *arrow, start_return_type,
  468. /*has_error=*/!return_type);
  469. if (!return_type) {
  470. return false;
  471. }
  472. }
  473. return params.hasValue();
  474. }
  475. auto ParseTree::Parser::ParseCodeBlock() -> llvm::Optional<Node> {
  476. RETURN_IF_STACK_LIMITED(llvm::None);
  477. llvm::Optional<TokenizedBuffer::Token> maybe_open_curly =
  478. ConsumeIf(TokenKind::OpenCurlyBrace());
  479. if (!maybe_open_curly) {
  480. // Recover by parsing a single statement.
  481. emitter_.EmitError<ExpectedCodeBlock>(*position_);
  482. return ParseStatement();
  483. }
  484. TokenizedBuffer::Token open_curly = *maybe_open_curly;
  485. auto start = GetSubtreeStartPosition();
  486. bool has_errors = false;
  487. // Loop over all the different possibly nested elements in the code block.
  488. while (!NextTokenIs(TokenKind::CloseCurlyBrace())) {
  489. if (!ParseStatement()) {
  490. // We detected and diagnosed an error of some kind. We can trivially skip
  491. // to the actual close curly brace from here.
  492. // FIXME: It would be better to skip to the next semicolon, or the next
  493. // token at the start of a line with the same indent as this one.
  494. SkipTo(tokens_.GetMatchedClosingToken(open_curly));
  495. has_errors = true;
  496. break;
  497. }
  498. }
  499. // We always reach here having set our position in the token stream to the
  500. // close curly brace.
  501. AddLeafNode(ParseNodeKind::CodeBlockEnd(),
  502. Consume(TokenKind::CloseCurlyBrace()));
  503. return AddNode(ParseNodeKind::CodeBlock(), open_curly, start, has_errors);
  504. }
  505. auto ParseTree::Parser::ParseFunctionDeclaration() -> Node {
  506. TokenizedBuffer::Token function_intro_token = Consume(TokenKind::FnKeyword());
  507. auto start = GetSubtreeStartPosition();
  508. auto add_error_function_node = [&] {
  509. return AddNode(ParseNodeKind::FunctionDeclaration(), function_intro_token,
  510. start, /*has_error=*/true);
  511. };
  512. RETURN_IF_STACK_LIMITED(add_error_function_node());
  513. auto handle_semi_in_error_recovery = [&](TokenizedBuffer::Token semi) {
  514. return AddLeafNode(ParseNodeKind::DeclarationEnd(), semi);
  515. };
  516. auto name_n = ConsumeAndAddLeafNodeIf(TokenKind::Identifier(),
  517. ParseNodeKind::DeclaredName());
  518. if (!name_n) {
  519. emitter_.EmitError<ExpectedFunctionName>(*position_);
  520. // FIXME: We could change the lexer to allow us to synthesize certain
  521. // kinds of tokens and try to "recover" here, but unclear that this is
  522. // really useful.
  523. SkipPastLikelyEnd(function_intro_token, handle_semi_in_error_recovery);
  524. return add_error_function_node();
  525. }
  526. TokenizedBuffer::Token open_paren = *position_;
  527. if (tokens_.GetKind(open_paren) != TokenKind::OpenParen()) {
  528. emitter_.EmitError<ExpectedFunctionParams>(open_paren);
  529. SkipPastLikelyEnd(function_intro_token, handle_semi_in_error_recovery);
  530. return add_error_function_node();
  531. }
  532. TokenizedBuffer::Token close_paren =
  533. tokens_.GetMatchedClosingToken(open_paren);
  534. if (!ParseFunctionSignature()) {
  535. // Don't try to parse more of the function declaration, but consume a
  536. // declaration ending semicolon if found (without going to a new line).
  537. SkipPastLikelyEnd(function_intro_token, handle_semi_in_error_recovery);
  538. return add_error_function_node();
  539. }
  540. // See if we should parse a definition which is represented as a code block.
  541. if (NextTokenIs(TokenKind::OpenCurlyBrace())) {
  542. if (!ParseCodeBlock()) {
  543. return add_error_function_node();
  544. }
  545. } else if (!ConsumeAndAddLeafNodeIf(TokenKind::Semi(),
  546. ParseNodeKind::DeclarationEnd())) {
  547. emitter_.EmitError<ExpectedFunctionBodyOrSemi>(*position_);
  548. if (tokens_.GetLine(*position_) == tokens_.GetLine(close_paren)) {
  549. // Only need to skip if we've not already found a new line.
  550. SkipPastLikelyEnd(function_intro_token, handle_semi_in_error_recovery);
  551. }
  552. return add_error_function_node();
  553. }
  554. // Successfully parsed the function, add that node.
  555. return AddNode(ParseNodeKind::FunctionDeclaration(), function_intro_token,
  556. start);
  557. }
  558. auto ParseTree::Parser::ParseVariableDeclaration() -> Node {
  559. // `var` pattern [= expression] `;`
  560. TokenizedBuffer::Token var_token = Consume(TokenKind::VarKeyword());
  561. auto start = GetSubtreeStartPosition();
  562. RETURN_IF_STACK_LIMITED(AddNode(ParseNodeKind::VariableDeclaration(),
  563. var_token, start,
  564. /*has_error=*/true));
  565. auto pattern = ParsePattern(PatternKind::Variable);
  566. if (!pattern) {
  567. if (auto after_pattern =
  568. FindNextOf({TokenKind::Equal(), TokenKind::Semi()})) {
  569. SkipTo(*after_pattern);
  570. }
  571. }
  572. auto start_init = GetSubtreeStartPosition();
  573. if (auto equal_token = ConsumeIf(TokenKind::Equal())) {
  574. auto init = ParseExpression();
  575. AddNode(ParseNodeKind::VariableInitializer(), *equal_token, start_init,
  576. /*has_error=*/!init);
  577. }
  578. auto semi = ConsumeAndAddLeafNodeIf(TokenKind::Semi(),
  579. ParseNodeKind::DeclarationEnd());
  580. if (!semi) {
  581. emitter_.EmitError<ExpectedSemiAfterExpression>(*position_);
  582. SkipPastLikelyEnd(var_token, [&](TokenizedBuffer::Token semi) {
  583. return AddLeafNode(ParseNodeKind::DeclarationEnd(), semi);
  584. });
  585. }
  586. return AddNode(ParseNodeKind::VariableDeclaration(), var_token, start,
  587. /*has_error=*/!pattern || !semi);
  588. }
  589. auto ParseTree::Parser::ParseEmptyDeclaration() -> Node {
  590. return AddLeafNode(ParseNodeKind::EmptyDeclaration(),
  591. Consume(TokenKind::Semi()));
  592. }
  593. auto ParseTree::Parser::ParseDeclaration() -> llvm::Optional<Node> {
  594. RETURN_IF_STACK_LIMITED(llvm::None);
  595. switch (NextTokenKind()) {
  596. case TokenKind::FnKeyword():
  597. return ParseFunctionDeclaration();
  598. case TokenKind::VarKeyword():
  599. return ParseVariableDeclaration();
  600. case TokenKind::Semi():
  601. return ParseEmptyDeclaration();
  602. case TokenKind::EndOfFile():
  603. return llvm::None;
  604. default:
  605. // Errors are handled outside the switch.
  606. break;
  607. }
  608. // We didn't recognize an introducer for a valid declaration.
  609. emitter_.EmitError<UnrecognizedDeclaration>(*position_);
  610. // Skip forward past any end of a declaration we simply didn't understand so
  611. // that we can find the start of the next declaration or the end of a scope.
  612. if (auto found_semi_n =
  613. SkipPastLikelyEnd(*position_, [&](TokenizedBuffer::Token semi) {
  614. return AddLeafNode(ParseNodeKind::EmptyDeclaration(), semi);
  615. })) {
  616. MarkNodeError(*found_semi_n);
  617. return *found_semi_n;
  618. }
  619. // Nothing, not even a semicolon found.
  620. return llvm::None;
  621. }
  622. auto ParseTree::Parser::ParseParenExpression() -> llvm::Optional<Node> {
  623. RETURN_IF_STACK_LIMITED(llvm::None);
  624. // parenthesized-expression ::= `(` expression `)`
  625. // tuple-literal ::= `(` `)`
  626. // ::= `(` expression `,` [expression-list [`,`]] `)`
  627. //
  628. // Parse the union of these, `(` [expression-list [`,`]] `)`, and work out
  629. // whether it's a tuple or a parenthesized expression afterwards.
  630. auto start = GetSubtreeStartPosition();
  631. return ParseParenList(
  632. [&] { return ParseExpression(); }, ParseNodeKind::TupleLiteralComma(),
  633. [&](TokenizedBuffer::Token open_paren, bool is_single_item,
  634. TokenizedBuffer::Token close_paren, bool has_arg_errors) {
  635. AddLeafNode(is_single_item ? ParseNodeKind::ParenExpressionEnd()
  636. : ParseNodeKind::TupleLiteralEnd(),
  637. close_paren);
  638. return AddNode(is_single_item ? ParseNodeKind::ParenExpression()
  639. : ParseNodeKind::TupleLiteral(),
  640. open_paren, start, has_arg_errors);
  641. },
  642. /*allow_trailing_comma=*/true);
  643. }
  644. auto ParseTree::Parser::ParseBraceExpression() -> llvm::Optional<Node> {
  645. RETURN_IF_STACK_LIMITED(llvm::None);
  646. // braced-expression ::= `{` [field-value-list] `}`
  647. // ::= `{` field-type-list `}`
  648. // field-value-list ::= field-value [`,`]
  649. // ::= field-value `,` field-value-list
  650. // field-value ::= `.` identifier `=` expression
  651. // field-type-list ::= field-type [`,`]
  652. // ::= field-type `,` field-type-list
  653. // field-type ::= `.` identifier `:` type
  654. //
  655. // Note that `{` `}` is the first form (an empty struct), but that an empty
  656. // struct value also behaves as an empty struct type.
  657. auto start = GetSubtreeStartPosition();
  658. enum Kind { Unknown, Value, Type };
  659. Kind kind = Unknown;
  660. return ParseList(
  661. TokenKind::OpenCurlyBrace(), TokenKind::CloseCurlyBrace(),
  662. [&]() -> llvm::Optional<Node> {
  663. auto start_elem = GetSubtreeStartPosition();
  664. auto diagnose_invalid_syntax = [&] {
  665. emitter_.EmitError<ExpectedStructLiteralField>(
  666. *position_,
  667. {.can_be_type = kind != Value, .can_be_value = kind != Type});
  668. return llvm::None;
  669. };
  670. if (!NextTokenIs(TokenKind::Period())) {
  671. return diagnose_invalid_syntax();
  672. }
  673. auto designator = ParseDesignatorExpression(
  674. start_elem, ParseNodeKind::StructFieldDesignator(),
  675. /*has_errors=*/false);
  676. if (!designator) {
  677. auto recovery_pos = FindNextOf(
  678. {TokenKind::Equal(), TokenKind::Colon(), TokenKind::Comma()});
  679. if (!recovery_pos ||
  680. tokens_.GetKind(*recovery_pos) == TokenKind::Comma()) {
  681. return llvm::None;
  682. }
  683. SkipTo(*recovery_pos);
  684. }
  685. // Work out the kind of this element
  686. Kind elem_kind =
  687. (NextTokenIs(TokenKind::Equal())
  688. ? Value
  689. : NextTokenIs(TokenKind::Colon()) ? Type : Unknown);
  690. if (elem_kind == Unknown || (kind != Unknown && elem_kind != kind)) {
  691. return diagnose_invalid_syntax();
  692. }
  693. kind = elem_kind;
  694. // Struct type fields and value fields use the same grammar except that
  695. // one has a `:` separator and the other has an `=` separator.
  696. auto equal_or_colon_token =
  697. Consume(kind == Type ? TokenKind::Colon() : TokenKind::Equal());
  698. auto type_or_value = ParseExpression();
  699. return AddNode(kind == Type ? ParseNodeKind::StructFieldType()
  700. : ParseNodeKind::StructFieldValue(),
  701. equal_or_colon_token, start_elem,
  702. /*has_error=*/!designator || !type_or_value);
  703. },
  704. ParseNodeKind::StructComma(),
  705. [&](TokenizedBuffer::Token open_brace, bool /*is_single_item*/,
  706. TokenizedBuffer::Token close_brace, bool has_errors) {
  707. AddLeafNode(ParseNodeKind::StructEnd(), close_brace);
  708. return AddNode(kind == Type ? ParseNodeKind::StructTypeLiteral()
  709. : ParseNodeKind::StructLiteral(),
  710. open_brace, start, has_errors);
  711. },
  712. /*allow_trailing_comma=*/true);
  713. }
  714. auto ParseTree::Parser::ParsePrimaryExpression() -> llvm::Optional<Node> {
  715. RETURN_IF_STACK_LIMITED(llvm::None);
  716. llvm::Optional<ParseNodeKind> kind;
  717. switch (NextTokenKind()) {
  718. case TokenKind::Identifier():
  719. kind = ParseNodeKind::NameReference();
  720. break;
  721. case TokenKind::IntegerLiteral():
  722. case TokenKind::RealLiteral():
  723. case TokenKind::StringLiteral():
  724. case TokenKind::IntegerTypeLiteral():
  725. case TokenKind::UnsignedIntegerTypeLiteral():
  726. case TokenKind::FloatingPointTypeLiteral():
  727. kind = ParseNodeKind::Literal();
  728. break;
  729. case TokenKind::OpenParen():
  730. return ParseParenExpression();
  731. case TokenKind::OpenCurlyBrace():
  732. return ParseBraceExpression();
  733. default:
  734. emitter_.EmitError<ExpectedExpression>(*position_);
  735. return llvm::None;
  736. }
  737. return AddLeafNode(*kind, Consume(NextTokenKind()));
  738. }
  739. auto ParseTree::Parser::ParseDesignatorExpression(SubtreeStart start,
  740. ParseNodeKind kind,
  741. bool has_errors)
  742. -> llvm::Optional<Node> {
  743. // `.` identifier
  744. auto dot = Consume(TokenKind::Period());
  745. auto name = ConsumeIf(TokenKind::Identifier());
  746. if (name) {
  747. AddLeafNode(ParseNodeKind::DesignatedName(), *name);
  748. } else {
  749. emitter_.EmitError<ExpectedIdentifierAfterDot>(*position_);
  750. // If we see a keyword, assume it was intended to be the designated name.
  751. // TODO: Should keywords be valid in designators?
  752. if (NextTokenKind().IsKeyword()) {
  753. name = Consume(NextTokenKind());
  754. auto name_node = AddLeafNode(ParseNodeKind::DesignatedName(), *name);
  755. MarkNodeError(name_node);
  756. } else {
  757. has_errors = true;
  758. }
  759. }
  760. Node result = AddNode(kind, dot, start, has_errors);
  761. return name ? result : llvm::Optional<Node>();
  762. }
  763. auto ParseTree::Parser::ParseCallExpression(SubtreeStart start, bool has_errors)
  764. -> llvm::Optional<Node> {
  765. RETURN_IF_STACK_LIMITED(llvm::None);
  766. // `(` expression-list[opt] `)`
  767. //
  768. // expression-list ::= expression
  769. // ::= expression `,` expression-list
  770. return ParseParenList(
  771. [&] { return ParseExpression(); }, ParseNodeKind::CallExpressionComma(),
  772. [&](TokenizedBuffer::Token open_paren, bool /*is_single_item*/,
  773. TokenizedBuffer::Token close_paren, bool has_arg_errors) {
  774. AddLeafNode(ParseNodeKind::CallExpressionEnd(), close_paren);
  775. return AddNode(ParseNodeKind::CallExpression(), open_paren, start,
  776. has_errors || has_arg_errors);
  777. });
  778. }
  779. auto ParseTree::Parser::ParsePostfixExpression() -> llvm::Optional<Node> {
  780. RETURN_IF_STACK_LIMITED(llvm::None);
  781. auto start = GetSubtreeStartPosition();
  782. llvm::Optional<Node> expression = ParsePrimaryExpression();
  783. while (true) {
  784. switch (NextTokenKind()) {
  785. case TokenKind::Period():
  786. expression = ParseDesignatorExpression(
  787. start, ParseNodeKind::DesignatorExpression(), !expression);
  788. break;
  789. case TokenKind::OpenParen():
  790. expression = ParseCallExpression(start, !expression);
  791. break;
  792. default: {
  793. return expression;
  794. }
  795. }
  796. }
  797. }
  798. // Determines whether the given token is considered to be the start of an
  799. // operand according to the rules for infix operator parsing.
  800. static auto IsAssumedStartOfOperand(TokenKind kind) -> bool {
  801. return kind.IsOneOf({TokenKind::OpenParen(), TokenKind::Identifier(),
  802. TokenKind::IntegerLiteral(), TokenKind::RealLiteral(),
  803. TokenKind::StringLiteral()});
  804. }
  805. // Determines whether the given token is considered to be the end of an operand
  806. // according to the rules for infix operator parsing.
  807. static auto IsAssumedEndOfOperand(TokenKind kind) -> bool {
  808. return kind.IsOneOf({TokenKind::CloseParen(), TokenKind::CloseCurlyBrace(),
  809. TokenKind::CloseSquareBracket(), TokenKind::Identifier(),
  810. TokenKind::IntegerLiteral(), TokenKind::RealLiteral(),
  811. TokenKind::StringLiteral()});
  812. }
  813. // Determines whether the given token could possibly be the start of an operand.
  814. // This is conservatively correct, and will never incorrectly return `false`,
  815. // but can incorrectly return `true`.
  816. static auto IsPossibleStartOfOperand(TokenKind kind) -> bool {
  817. return !kind.IsOneOf({TokenKind::CloseParen(), TokenKind::CloseCurlyBrace(),
  818. TokenKind::CloseSquareBracket(), TokenKind::Comma(),
  819. TokenKind::Semi(), TokenKind::Colon()});
  820. }
  821. auto ParseTree::Parser::IsLexicallyValidInfixOperator() -> bool {
  822. CHECK(!AtEndOfFile()) << "Expected an operator token.";
  823. bool leading_space = tokens_.HasLeadingWhitespace(*position_);
  824. bool trailing_space = tokens_.HasTrailingWhitespace(*position_);
  825. // If there's whitespace on both sides, it's an infix operator.
  826. if (leading_space && trailing_space) {
  827. return true;
  828. }
  829. // If there's whitespace on exactly one side, it's not an infix operator.
  830. if (leading_space || trailing_space) {
  831. return false;
  832. }
  833. // Otherwise, for an infix operator, the preceding token must be any close
  834. // bracket, identifier, or literal and the next token must be an open paren,
  835. // identifier, or literal.
  836. if (position_ == tokens_.Tokens().begin() ||
  837. !IsAssumedEndOfOperand(tokens_.GetKind(*(position_ - 1))) ||
  838. !IsAssumedStartOfOperand(tokens_.GetKind(*(position_ + 1)))) {
  839. return false;
  840. }
  841. return true;
  842. }
  843. auto ParseTree::Parser::DiagnoseOperatorFixity(OperatorFixity fixity) -> void {
  844. bool is_valid_as_infix = IsLexicallyValidInfixOperator();
  845. if (fixity == OperatorFixity::Infix) {
  846. // Infix operators must satisfy the infix operator rules.
  847. if (!is_valid_as_infix) {
  848. emitter_.EmitError<BinaryOperatorRequiresWhitespace>(
  849. *position_,
  850. {.has_leading_space = tokens_.HasLeadingWhitespace(*position_),
  851. .has_trailing_space = tokens_.HasTrailingWhitespace(*position_)});
  852. }
  853. } else {
  854. bool prefix = fixity == OperatorFixity::Prefix;
  855. // Whitespace is not permitted between a symbolic pre/postfix operator and
  856. // its operand.
  857. if (NextTokenKind().IsSymbol() &&
  858. (prefix ? tokens_.HasTrailingWhitespace(*position_)
  859. : tokens_.HasLeadingWhitespace(*position_))) {
  860. emitter_.EmitError<UnaryOperatorHasWhitespace>(*position_,
  861. {.prefix = prefix});
  862. }
  863. // Pre/postfix operators must not satisfy the infix operator rules.
  864. if (is_valid_as_infix) {
  865. emitter_.EmitError<UnaryOperatorRequiresWhitespace>(*position_,
  866. {.prefix = prefix});
  867. }
  868. }
  869. }
  870. auto ParseTree::Parser::IsTrailingOperatorInfix() -> bool {
  871. if (AtEndOfFile()) {
  872. return false;
  873. }
  874. // An operator that follows the infix operator rules is parsed as
  875. // infix, unless the next token means that it can't possibly be.
  876. if (IsLexicallyValidInfixOperator() &&
  877. IsPossibleStartOfOperand(tokens_.GetKind(*(position_ + 1)))) {
  878. return true;
  879. }
  880. // A trailing operator with leading whitespace that's not valid as infix is
  881. // not valid at all. If the next token looks like the start of an operand,
  882. // then parse as infix, otherwise as postfix. Either way we'll produce a
  883. // diagnostic later on.
  884. if (tokens_.HasLeadingWhitespace(*position_) &&
  885. IsAssumedStartOfOperand(tokens_.GetKind(*(position_ + 1)))) {
  886. return true;
  887. }
  888. return false;
  889. }
  890. auto ParseTree::Parser::ParseOperatorExpression(
  891. PrecedenceGroup ambient_precedence) -> llvm::Optional<Node> {
  892. RETURN_IF_STACK_LIMITED(llvm::None);
  893. auto start = GetSubtreeStartPosition();
  894. llvm::Optional<Node> lhs;
  895. PrecedenceGroup lhs_precedence = PrecedenceGroup::ForPostfixExpression();
  896. // Check for a prefix operator.
  897. if (auto operator_precedence = PrecedenceGroup::ForLeading(NextTokenKind());
  898. !operator_precedence) {
  899. lhs = ParsePostfixExpression();
  900. } else {
  901. if (PrecedenceGroup::GetPriority(ambient_precedence,
  902. *operator_precedence) !=
  903. OperatorPriority::RightFirst) {
  904. // The precedence rules don't permit this prefix operator in this
  905. // context. Diagnose this, but carry on and parse it anyway.
  906. emitter_.EmitError<OperatorRequiresParentheses>(*position_);
  907. } else {
  908. // Check that this operator follows the proper whitespace rules.
  909. DiagnoseOperatorFixity(OperatorFixity::Prefix);
  910. }
  911. auto operator_token = Consume(NextTokenKind());
  912. bool has_errors = !ParseOperatorExpression(*operator_precedence);
  913. lhs = AddNode(ParseNodeKind::PrefixOperator(), operator_token, start,
  914. has_errors);
  915. lhs_precedence = *operator_precedence;
  916. }
  917. // Consume a sequence of infix and postfix operators.
  918. while (auto trailing_operator = PrecedenceGroup::ForTrailing(
  919. NextTokenKind(), IsTrailingOperatorInfix())) {
  920. auto [operator_precedence, is_binary] = *trailing_operator;
  921. // FIXME: If this operator is ambiguous with either the ambient precedence
  922. // or the LHS precedence, and there's a variant with a different fixity
  923. // that would work, use that one instead for error recovery.
  924. if (PrecedenceGroup::GetPriority(ambient_precedence, operator_precedence) !=
  925. OperatorPriority::RightFirst) {
  926. // The precedence rules don't permit this operator in this context. Try
  927. // again in the enclosing expression context.
  928. return lhs;
  929. }
  930. if (PrecedenceGroup::GetPriority(lhs_precedence, operator_precedence) !=
  931. OperatorPriority::LeftFirst) {
  932. // Either the LHS operator and this operator are ambiguous, or the
  933. // LHS operaor is a unary operator that can't be nested within
  934. // this operator. Either way, parentheses are required.
  935. emitter_.EmitError<OperatorRequiresParentheses>(*position_);
  936. lhs = llvm::None;
  937. } else {
  938. DiagnoseOperatorFixity(is_binary ? OperatorFixity::Infix
  939. : OperatorFixity::Postfix);
  940. }
  941. auto operator_token = Consume(NextTokenKind());
  942. if (is_binary) {
  943. auto rhs = ParseOperatorExpression(operator_precedence);
  944. lhs = AddNode(ParseNodeKind::InfixOperator(), operator_token, start,
  945. /*has_error=*/!lhs || !rhs);
  946. } else {
  947. lhs = AddNode(ParseNodeKind::PostfixOperator(), operator_token, start,
  948. /*has_error=*/!lhs);
  949. }
  950. lhs_precedence = operator_precedence;
  951. }
  952. return lhs;
  953. }
  954. auto ParseTree::Parser::ParseExpression() -> llvm::Optional<Node> {
  955. RETURN_IF_STACK_LIMITED(llvm::None);
  956. return ParseOperatorExpression(PrecedenceGroup::ForTopLevelExpression());
  957. }
  958. auto ParseTree::Parser::ParseType() -> llvm::Optional<Node> {
  959. RETURN_IF_STACK_LIMITED(llvm::None);
  960. return ParseOperatorExpression(PrecedenceGroup::ForType());
  961. }
  962. auto ParseTree::Parser::ParseExpressionStatement() -> llvm::Optional<Node> {
  963. RETURN_IF_STACK_LIMITED(llvm::None);
  964. TokenizedBuffer::Token start_token = *position_;
  965. auto start = GetSubtreeStartPosition();
  966. bool has_errors = !ParseExpression();
  967. if (auto semi = ConsumeIf(TokenKind::Semi())) {
  968. return AddNode(ParseNodeKind::ExpressionStatement(), *semi, start,
  969. has_errors);
  970. }
  971. if (!has_errors) {
  972. emitter_.EmitError<ExpectedSemiAfterExpression>(*position_);
  973. }
  974. if (auto recovery_node =
  975. SkipPastLikelyEnd(start_token, [&](TokenizedBuffer::Token semi) {
  976. return AddNode(ParseNodeKind::ExpressionStatement(), semi, start,
  977. true);
  978. })) {
  979. return recovery_node;
  980. }
  981. // Found junk not even followed by a `;`.
  982. return llvm::None;
  983. }
  984. auto ParseTree::Parser::ParseParenCondition(TokenKind introducer)
  985. -> llvm::Optional<Node> {
  986. RETURN_IF_STACK_LIMITED(llvm::None);
  987. // `(` expression `)`
  988. auto start = GetSubtreeStartPosition();
  989. auto open_paren = ConsumeIf(TokenKind::OpenParen());
  990. if (!open_paren) {
  991. emitter_.EmitError<ExpectedParenAfter>(*position_,
  992. {.introducer = introducer});
  993. }
  994. auto expr = ParseExpression();
  995. if (!open_paren) {
  996. // Don't expect a matching closing paren if there wasn't an opening paren.
  997. return llvm::None;
  998. }
  999. auto close_paren =
  1000. ParseCloseParen(*open_paren, ParseNodeKind::ConditionEnd());
  1001. return AddNode(ParseNodeKind::Condition(), *open_paren, start,
  1002. /*has_error=*/!expr || !close_paren);
  1003. }
  1004. auto ParseTree::Parser::ParseIfStatement() -> llvm::Optional<Node> {
  1005. auto start = GetSubtreeStartPosition();
  1006. auto if_token = Consume(TokenKind::IfKeyword());
  1007. auto cond = ParseParenCondition(TokenKind::IfKeyword());
  1008. auto then_case = ParseCodeBlock();
  1009. bool else_has_errors = false;
  1010. if (ConsumeAndAddLeafNodeIf(TokenKind::ElseKeyword(),
  1011. ParseNodeKind::IfStatementElse())) {
  1012. // 'else if' is permitted as a special case.
  1013. if (NextTokenIs(TokenKind::IfKeyword())) {
  1014. else_has_errors = !ParseIfStatement();
  1015. } else {
  1016. else_has_errors = !ParseCodeBlock();
  1017. }
  1018. }
  1019. return AddNode(ParseNodeKind::IfStatement(), if_token, start,
  1020. /*has_error=*/!cond || !then_case || else_has_errors);
  1021. }
  1022. auto ParseTree::Parser::ParseWhileStatement() -> llvm::Optional<Node> {
  1023. RETURN_IF_STACK_LIMITED(llvm::None);
  1024. auto start = GetSubtreeStartPosition();
  1025. auto while_token = Consume(TokenKind::WhileKeyword());
  1026. auto cond = ParseParenCondition(TokenKind::WhileKeyword());
  1027. auto body = ParseCodeBlock();
  1028. return AddNode(ParseNodeKind::WhileStatement(), while_token, start,
  1029. /*has_error=*/!cond || !body);
  1030. }
  1031. auto ParseTree::Parser::ParseKeywordStatement(ParseNodeKind kind,
  1032. KeywordStatementArgument argument)
  1033. -> llvm::Optional<Node> {
  1034. RETURN_IF_STACK_LIMITED(llvm::None);
  1035. auto keyword_kind = NextTokenKind();
  1036. assert(keyword_kind.IsKeyword());
  1037. auto start = GetSubtreeStartPosition();
  1038. auto keyword = Consume(keyword_kind);
  1039. bool arg_error = false;
  1040. if ((argument == KeywordStatementArgument::Optional &&
  1041. NextTokenKind() != TokenKind::Semi()) ||
  1042. argument == KeywordStatementArgument::Mandatory) {
  1043. arg_error = !ParseExpression();
  1044. }
  1045. auto semi =
  1046. ConsumeAndAddLeafNodeIf(TokenKind::Semi(), ParseNodeKind::StatementEnd());
  1047. if (!semi) {
  1048. emitter_.EmitError<ExpectedSemiAfter>(*position_,
  1049. {.preceding = keyword_kind});
  1050. // FIXME: Try to skip to a semicolon to recover.
  1051. }
  1052. return AddNode(kind, keyword, start, /*has_error=*/!semi || arg_error);
  1053. }
  1054. auto ParseTree::Parser::ParseStatement() -> llvm::Optional<Node> {
  1055. RETURN_IF_STACK_LIMITED(llvm::None);
  1056. switch (NextTokenKind()) {
  1057. case TokenKind::VarKeyword():
  1058. return ParseVariableDeclaration();
  1059. case TokenKind::IfKeyword():
  1060. return ParseIfStatement();
  1061. case TokenKind::WhileKeyword():
  1062. return ParseWhileStatement();
  1063. case TokenKind::ContinueKeyword():
  1064. return ParseKeywordStatement(ParseNodeKind::ContinueStatement(),
  1065. KeywordStatementArgument::None);
  1066. case TokenKind::BreakKeyword():
  1067. return ParseKeywordStatement(ParseNodeKind::BreakStatement(),
  1068. KeywordStatementArgument::None);
  1069. case TokenKind::ReturnKeyword():
  1070. return ParseKeywordStatement(ParseNodeKind::ReturnStatement(),
  1071. KeywordStatementArgument::Optional);
  1072. default:
  1073. // A statement with no introducer token can only be an expression
  1074. // statement.
  1075. return ParseExpressionStatement();
  1076. }
  1077. }
  1078. } // namespace Carbon