parser_impl.cpp 42 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "toolchain/parser/parser_impl.h"
  5. #include <cstdlib>
  6. #include "common/check.h"
  7. #include "llvm/ADT/Optional.h"
  8. #include "llvm/Support/FormatVariadic.h"
  9. #include "llvm/Support/raw_ostream.h"
  10. #include "toolchain/lexer/token_kind.h"
  11. #include "toolchain/lexer/tokenized_buffer.h"
  12. #include "toolchain/parser/parse_node_kind.h"
  13. #include "toolchain/parser/parse_tree.h"
  14. namespace Carbon {
  15. struct UnexpectedTokenInCodeBlock
  16. : SimpleDiagnostic<UnexpectedTokenInCodeBlock> {
  17. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  18. static constexpr llvm::StringLiteral Message =
  19. "Unexpected token in code block.";
  20. };
  21. struct ExpectedFunctionName : SimpleDiagnostic<ExpectedFunctionName> {
  22. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  23. static constexpr llvm::StringLiteral Message =
  24. "Expected function name after `fn` keyword.";
  25. };
  26. struct ExpectedFunctionParams : SimpleDiagnostic<ExpectedFunctionParams> {
  27. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  28. static constexpr llvm::StringLiteral Message =
  29. "Expected `(` after function name.";
  30. };
  31. struct ExpectedFunctionBodyOrSemi
  32. : SimpleDiagnostic<ExpectedFunctionBodyOrSemi> {
  33. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  34. static constexpr llvm::StringLiteral Message =
  35. "Expected function definition or `;` after function declaration.";
  36. };
  37. struct ExpectedVariableName : SimpleDiagnostic<ExpectedVariableName> {
  38. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  39. static constexpr llvm::StringLiteral Message =
  40. "Expected pattern in `var` declaration.";
  41. };
  42. struct ExpectedParameterName : SimpleDiagnostic<ExpectedParameterName> {
  43. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  44. static constexpr llvm::StringLiteral Message =
  45. "Expected parameter declaration.";
  46. };
  47. struct ExpectedStructLiteralField
  48. : SimpleDiagnostic<ExpectedStructLiteralField> {
  49. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  50. auto Format() -> std::string {
  51. std::string result = "Expected ";
  52. if (can_be_type) {
  53. result += "`.field: type`";
  54. }
  55. if (can_be_type && can_be_value) {
  56. result += " or ";
  57. }
  58. if (can_be_value) {
  59. result += "`.field = value`";
  60. }
  61. result += ".";
  62. return result;
  63. }
  64. bool can_be_type;
  65. bool can_be_value;
  66. };
  67. struct UnrecognizedDeclaration : SimpleDiagnostic<UnrecognizedDeclaration> {
  68. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  69. static constexpr llvm::StringLiteral Message =
  70. "Unrecognized declaration introducer.";
  71. };
  72. struct ExpectedCodeBlock : SimpleDiagnostic<ExpectedCodeBlock> {
  73. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  74. static constexpr llvm::StringLiteral Message = "Expected braced code block.";
  75. };
  76. struct ExpectedExpression : SimpleDiagnostic<ExpectedExpression> {
  77. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  78. static constexpr llvm::StringLiteral Message = "Expected expression.";
  79. };
  80. struct ExpectedParenAfter : SimpleDiagnostic<ExpectedParenAfter> {
  81. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  82. static constexpr const char* Message = "Expected `(` after `{0}`.";
  83. auto Format() -> std::string {
  84. return llvm::formatv(Message, introducer.GetFixedSpelling()).str();
  85. }
  86. TokenKind introducer;
  87. };
  88. struct ExpectedCloseParen : SimpleDiagnostic<ExpectedCloseParen> {
  89. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  90. static constexpr llvm::StringLiteral Message =
  91. "Unexpected tokens before `)`.";
  92. // TODO: Include the location of the matching open paren in the diagnostic.
  93. TokenizedBuffer::Token open_paren;
  94. };
  95. struct ExpectedSemiAfterExpression
  96. : SimpleDiagnostic<ExpectedSemiAfterExpression> {
  97. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  98. static constexpr llvm::StringLiteral Message =
  99. "Expected `;` after expression.";
  100. };
  101. struct ExpectedSemiAfter : SimpleDiagnostic<ExpectedSemiAfter> {
  102. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  103. static constexpr const char* Message = "Expected `;` after `{0}`.";
  104. auto Format() -> std::string {
  105. return llvm::formatv(Message, preceding.GetFixedSpelling()).str();
  106. }
  107. TokenKind preceding;
  108. };
  109. struct ExpectedIdentifierAfterDot
  110. : SimpleDiagnostic<ExpectedIdentifierAfterDot> {
  111. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  112. static constexpr llvm::StringLiteral Message =
  113. "Expected identifier after `.`.";
  114. };
  115. struct UnexpectedTokenAfterListElement
  116. : SimpleDiagnostic<UnexpectedTokenAfterListElement> {
  117. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  118. static constexpr const char* Message = "Expected `,` or `{0}`.";
  119. auto Format() -> std::string {
  120. return llvm::formatv(Message, close.GetFixedSpelling()).str();
  121. }
  122. TokenKind close;
  123. };
  124. struct BinaryOperatorRequiresWhitespace
  125. : SimpleDiagnostic<BinaryOperatorRequiresWhitespace> {
  126. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  127. static constexpr const char* Message =
  128. "Whitespace missing {0} binary operator.";
  129. auto Format() -> std::string {
  130. const char* position = "around";
  131. if (has_leading_space) {
  132. position = "after";
  133. } else if (has_trailing_space) {
  134. position = "before";
  135. }
  136. return llvm::formatv(Message, position);
  137. }
  138. bool has_leading_space;
  139. bool has_trailing_space;
  140. };
  141. struct UnaryOperatorHasWhitespace
  142. : SimpleDiagnostic<UnaryOperatorHasWhitespace> {
  143. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  144. static constexpr const char* Message =
  145. "Whitespace is not allowed {0} this unary operator.";
  146. auto Format() -> std::string {
  147. return llvm::formatv(Message, prefix ? "after" : "before");
  148. }
  149. bool prefix;
  150. };
  151. struct UnaryOperatorRequiresWhitespace
  152. : SimpleDiagnostic<UnaryOperatorRequiresWhitespace> {
  153. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  154. static constexpr const char* Message =
  155. "Whitespace is required {0} this unary operator.";
  156. auto Format() -> std::string {
  157. return llvm::formatv(Message, prefix ? "before" : "after");
  158. }
  159. bool prefix;
  160. };
  161. struct OperatorRequiresParentheses
  162. : SimpleDiagnostic<OperatorRequiresParentheses> {
  163. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  164. static constexpr llvm::StringLiteral Message =
  165. "Parentheses are required to disambiguate operator precedence.";
  166. };
  167. ParseTree::Parser::Parser(ParseTree& tree_arg, TokenizedBuffer& tokens_arg,
  168. TokenDiagnosticEmitter& emitter)
  169. : tree_(tree_arg),
  170. tokens_(tokens_arg),
  171. emitter_(emitter),
  172. position_(tokens_.Tokens().begin()),
  173. end_(tokens_.Tokens().end()) {
  174. CHECK(std::find_if(position_, end_,
  175. [&](TokenizedBuffer::Token t) {
  176. return tokens_.GetKind(t) == TokenKind::EndOfFile();
  177. }) != end_)
  178. << "No EndOfFileToken in token buffer.";
  179. }
  180. auto ParseTree::Parser::Parse(TokenizedBuffer& tokens,
  181. TokenDiagnosticEmitter& emitter) -> ParseTree {
  182. ParseTree tree(tokens);
  183. // We expect to have a 1:1 correspondence between tokens and tree nodes, so
  184. // reserve the space we expect to need here to avoid allocation and copying
  185. // overhead.
  186. tree.node_impls_.reserve(tokens.Size());
  187. Parser parser(tree, tokens, emitter);
  188. while (!parser.AtEndOfFile()) {
  189. if (!parser.ParseDeclaration()) {
  190. // We don't have an enclosing parse tree node to mark as erroneous, so
  191. // just mark the tree as a whole.
  192. tree.has_errors_ = true;
  193. }
  194. }
  195. parser.AddLeafNode(ParseNodeKind::FileEnd(), *parser.position_);
  196. CHECK(tree.Verify()) << "Parse tree built but does not verify!";
  197. return tree;
  198. }
  199. auto ParseTree::Parser::Consume(TokenKind kind) -> TokenizedBuffer::Token {
  200. CHECK(kind != TokenKind::EndOfFile()) << "Cannot consume the EOF token!";
  201. CHECK(NextTokenIs(kind)) << "The current token is the wrong kind!";
  202. TokenizedBuffer::Token t = *position_;
  203. ++position_;
  204. CHECK(position_ != end_)
  205. << "Reached end of tokens without finding EOF token.";
  206. return t;
  207. }
  208. auto ParseTree::Parser::ConsumeIf(TokenKind kind)
  209. -> llvm::Optional<TokenizedBuffer::Token> {
  210. if (!NextTokenIs(kind)) {
  211. return {};
  212. }
  213. return Consume(kind);
  214. }
  215. auto ParseTree::Parser::AddLeafNode(ParseNodeKind kind,
  216. TokenizedBuffer::Token token) -> Node {
  217. Node n(tree_.node_impls_.size());
  218. tree_.node_impls_.push_back(NodeImpl(kind, token, /*subtree_size_arg=*/1));
  219. return n;
  220. }
  221. auto ParseTree::Parser::ConsumeAndAddLeafNodeIf(TokenKind t_kind,
  222. ParseNodeKind n_kind)
  223. -> llvm::Optional<Node> {
  224. auto t = ConsumeIf(t_kind);
  225. if (!t) {
  226. return {};
  227. }
  228. return AddLeafNode(n_kind, *t);
  229. }
  230. auto ParseTree::Parser::MarkNodeError(Node n) -> void {
  231. tree_.node_impls_[n.index_].has_error = true;
  232. tree_.has_errors_ = true;
  233. }
  234. // A marker for the start of a node's subtree.
  235. //
  236. // This is used to track the size of the node's subtree. It can be used
  237. // repeatedly if multiple subtrees start at the same position.
  238. struct ParseTree::Parser::SubtreeStart {
  239. int tree_size;
  240. };
  241. auto ParseTree::Parser::GetSubtreeStartPosition() -> SubtreeStart {
  242. return {static_cast<int>(tree_.node_impls_.size())};
  243. }
  244. auto ParseTree::Parser::AddNode(ParseNodeKind n_kind, TokenizedBuffer::Token t,
  245. SubtreeStart start, bool has_error) -> Node {
  246. // The size of the subtree is the change in size from when we started this
  247. // subtree to now, but including the node we're about to add.
  248. int tree_stop_size = static_cast<int>(tree_.node_impls_.size()) + 1;
  249. int subtree_size = tree_stop_size - start.tree_size;
  250. Node n(tree_.node_impls_.size());
  251. tree_.node_impls_.push_back(NodeImpl(n_kind, t, subtree_size));
  252. if (has_error) {
  253. MarkNodeError(n);
  254. }
  255. return n;
  256. }
  257. auto ParseTree::Parser::SkipMatchingGroup() -> bool {
  258. TokenizedBuffer::Token t = *position_;
  259. TokenKind t_kind = tokens_.GetKind(t);
  260. if (!t_kind.IsOpeningSymbol()) {
  261. return false;
  262. }
  263. SkipTo(tokens_.GetMatchedClosingToken(t));
  264. Consume(t_kind.GetClosingSymbol());
  265. return true;
  266. }
  267. auto ParseTree::Parser::SkipTo(TokenizedBuffer::Token t) -> void {
  268. CHECK(t >= *position_) << "Tried to skip backwards.";
  269. position_ = TokenizedBuffer::TokenIterator(t);
  270. CHECK(position_ != end_) << "Skipped past EOF.";
  271. }
  272. auto ParseTree::Parser::FindNextOf(
  273. std::initializer_list<TokenKind> desired_kinds)
  274. -> llvm::Optional<TokenizedBuffer::Token> {
  275. auto new_position = position_;
  276. while (true) {
  277. TokenizedBuffer::Token token = *new_position;
  278. TokenKind kind = tokens_.GetKind(token);
  279. if (kind.IsOneOf(desired_kinds)) {
  280. return token;
  281. }
  282. // Step to the next token at the current bracketing level.
  283. if (kind.IsClosingSymbol() || kind == TokenKind::EndOfFile()) {
  284. // There are no more tokens at this level.
  285. return llvm::None;
  286. } else if (kind.IsOpeningSymbol()) {
  287. new_position =
  288. TokenizedBuffer::TokenIterator(tokens_.GetMatchedClosingToken(token));
  289. // Advance past the closing token.
  290. ++new_position;
  291. } else {
  292. ++new_position;
  293. }
  294. }
  295. }
  296. auto ParseTree::Parser::SkipPastLikelyEnd(TokenizedBuffer::Token skip_root,
  297. SemiHandler on_semi)
  298. -> llvm::Optional<Node> {
  299. if (AtEndOfFile()) {
  300. return llvm::None;
  301. }
  302. TokenizedBuffer::Line root_line = tokens_.GetLine(skip_root);
  303. int root_line_indent = tokens_.GetIndentColumnNumber(root_line);
  304. // We will keep scanning through tokens on the same line as the root or
  305. // lines with greater indentation than root's line.
  306. auto is_same_line_or_indent_greater_than_root =
  307. [&](TokenizedBuffer::Token t) {
  308. TokenizedBuffer::Line l = tokens_.GetLine(t);
  309. if (l == root_line) {
  310. return true;
  311. }
  312. return tokens_.GetIndentColumnNumber(l) > root_line_indent;
  313. };
  314. do {
  315. if (NextTokenKind() == TokenKind::CloseCurlyBrace()) {
  316. // Immediately bail out if we hit an unmatched close curly, this will
  317. // pop us up a level of the syntax grouping.
  318. return llvm::None;
  319. }
  320. // We assume that a semicolon is always intended to be the end of the
  321. // current construct.
  322. if (auto semi = ConsumeIf(TokenKind::Semi())) {
  323. return on_semi(*semi);
  324. }
  325. // Skip over any matching group of tokens_.
  326. if (SkipMatchingGroup()) {
  327. continue;
  328. }
  329. // Otherwise just step forward one token.
  330. Consume(NextTokenKind());
  331. } while (!AtEndOfFile() &&
  332. is_same_line_or_indent_greater_than_root(*position_));
  333. return llvm::None;
  334. }
  335. auto ParseTree::Parser::ParseCloseParen(TokenizedBuffer::Token open_paren,
  336. ParseNodeKind kind)
  337. -> llvm::Optional<Node> {
  338. if (auto close_paren =
  339. ConsumeAndAddLeafNodeIf(TokenKind::CloseParen(), kind)) {
  340. return close_paren;
  341. }
  342. emitter_.EmitError<ExpectedCloseParen>(*position_,
  343. {.open_paren = open_paren});
  344. SkipTo(tokens_.GetMatchedClosingToken(open_paren));
  345. AddLeafNode(kind, Consume(TokenKind::CloseParen()));
  346. return llvm::None;
  347. }
  348. template <typename ListElementParser, typename ListCompletionHandler>
  349. auto ParseTree::Parser::ParseList(TokenKind open, TokenKind close,
  350. ListElementParser list_element_parser,
  351. ParseNodeKind comma_kind,
  352. ListCompletionHandler list_handler,
  353. bool allow_trailing_comma)
  354. -> llvm::Optional<Node> {
  355. // `(` element-list[opt] `)`
  356. //
  357. // element-list ::= element
  358. // ::= element `,` element-list
  359. TokenizedBuffer::Token open_paren = Consume(open);
  360. bool has_errors = false;
  361. bool any_commas = false;
  362. int64_t num_elements = 0;
  363. // Parse elements, if any are specified.
  364. if (!NextTokenIs(close)) {
  365. while (true) {
  366. bool element_error = !list_element_parser();
  367. has_errors |= element_error;
  368. ++num_elements;
  369. if (!NextTokenIsOneOf({close, TokenKind::Comma()})) {
  370. if (!element_error) {
  371. emitter_.EmitError<UnexpectedTokenAfterListElement>(*position_,
  372. {.close = close});
  373. }
  374. has_errors = true;
  375. auto end_of_element = FindNextOf({TokenKind::Comma(), close});
  376. // The lexer guarantees that parentheses are balanced.
  377. CHECK(end_of_element) << "missing matching `)` for `(`";
  378. SkipTo(*end_of_element);
  379. }
  380. if (NextTokenIs(close)) {
  381. break;
  382. }
  383. AddLeafNode(comma_kind, Consume(TokenKind::Comma()));
  384. any_commas = true;
  385. if (allow_trailing_comma && NextTokenIs(close)) {
  386. break;
  387. }
  388. }
  389. }
  390. bool is_single_item = num_elements == 1 && !any_commas;
  391. return list_handler(open_paren, is_single_item, Consume(close), has_errors);
  392. }
  393. auto ParseTree::Parser::ParsePattern(PatternKind kind) -> llvm::Optional<Node> {
  394. if (NextTokenIs(TokenKind::Identifier()) &&
  395. tokens_.GetKind(*(position_ + 1)) == TokenKind::Colon()) {
  396. // identifier `:` type
  397. auto start = GetSubtreeStartPosition();
  398. AddLeafNode(ParseNodeKind::DeclaredName(),
  399. Consume(TokenKind::Identifier()));
  400. auto colon = Consume(TokenKind::Colon());
  401. auto type = ParseType();
  402. return AddNode(ParseNodeKind::PatternBinding(), colon, start,
  403. /*has_error=*/!type);
  404. }
  405. switch (kind) {
  406. case PatternKind::Parameter:
  407. emitter_.EmitError<ExpectedParameterName>(*position_);
  408. break;
  409. case PatternKind::Variable:
  410. emitter_.EmitError<ExpectedVariableName>(*position_);
  411. break;
  412. }
  413. return llvm::None;
  414. }
  415. auto ParseTree::Parser::ParseFunctionParameter() -> llvm::Optional<Node> {
  416. return ParsePattern(PatternKind::Parameter);
  417. }
  418. auto ParseTree::Parser::ParseFunctionSignature() -> bool {
  419. auto start = GetSubtreeStartPosition();
  420. auto params = ParseParenList(
  421. [&] { return ParseFunctionParameter(); },
  422. ParseNodeKind::ParameterListComma(),
  423. [&](TokenizedBuffer::Token open_paren, bool /*is_single_item*/,
  424. TokenizedBuffer::Token close_paren, bool has_errors) {
  425. AddLeafNode(ParseNodeKind::ParameterListEnd(), close_paren);
  426. return AddNode(ParseNodeKind::ParameterList(), open_paren, start,
  427. has_errors);
  428. });
  429. auto start_return_type = GetSubtreeStartPosition();
  430. if (auto arrow = ConsumeIf(TokenKind::MinusGreater())) {
  431. auto return_type = ParseType();
  432. AddNode(ParseNodeKind::ReturnType(), *arrow, start_return_type,
  433. /*has_error=*/!return_type);
  434. if (!return_type) {
  435. return false;
  436. }
  437. }
  438. return params.hasValue();
  439. }
  440. auto ParseTree::Parser::ParseCodeBlock() -> llvm::Optional<Node> {
  441. llvm::Optional<TokenizedBuffer::Token> maybe_open_curly =
  442. ConsumeIf(TokenKind::OpenCurlyBrace());
  443. if (!maybe_open_curly) {
  444. // Recover by parsing a single statement.
  445. emitter_.EmitError<ExpectedCodeBlock>(*position_);
  446. return ParseStatement();
  447. }
  448. TokenizedBuffer::Token open_curly = *maybe_open_curly;
  449. auto start = GetSubtreeStartPosition();
  450. bool has_errors = false;
  451. // Loop over all the different possibly nested elements in the code block.
  452. while (!NextTokenIs(TokenKind::CloseCurlyBrace())) {
  453. if (!ParseStatement()) {
  454. // We detected and diagnosed an error of some kind. We can trivially skip
  455. // to the actual close curly brace from here.
  456. // FIXME: It would be better to skip to the next semicolon, or the next
  457. // token at the start of a line with the same indent as this one.
  458. SkipTo(tokens_.GetMatchedClosingToken(open_curly));
  459. has_errors = true;
  460. break;
  461. }
  462. }
  463. // We always reach here having set our position in the token stream to the
  464. // close curly brace.
  465. AddLeafNode(ParseNodeKind::CodeBlockEnd(),
  466. Consume(TokenKind::CloseCurlyBrace()));
  467. return AddNode(ParseNodeKind::CodeBlock(), open_curly, start, has_errors);
  468. }
  469. auto ParseTree::Parser::ParseFunctionDeclaration() -> Node {
  470. TokenizedBuffer::Token function_intro_token = Consume(TokenKind::FnKeyword());
  471. auto start = GetSubtreeStartPosition();
  472. auto add_error_function_node = [&] {
  473. return AddNode(ParseNodeKind::FunctionDeclaration(), function_intro_token,
  474. start, /*has_error=*/true);
  475. };
  476. auto handle_semi_in_error_recovery = [&](TokenizedBuffer::Token semi) {
  477. return AddLeafNode(ParseNodeKind::DeclarationEnd(), semi);
  478. };
  479. auto name_n = ConsumeAndAddLeafNodeIf(TokenKind::Identifier(),
  480. ParseNodeKind::DeclaredName());
  481. if (!name_n) {
  482. emitter_.EmitError<ExpectedFunctionName>(*position_);
  483. // FIXME: We could change the lexer to allow us to synthesize certain
  484. // kinds of tokens and try to "recover" here, but unclear that this is
  485. // really useful.
  486. SkipPastLikelyEnd(function_intro_token, handle_semi_in_error_recovery);
  487. return add_error_function_node();
  488. }
  489. TokenizedBuffer::Token open_paren = *position_;
  490. if (tokens_.GetKind(open_paren) != TokenKind::OpenParen()) {
  491. emitter_.EmitError<ExpectedFunctionParams>(open_paren);
  492. SkipPastLikelyEnd(function_intro_token, handle_semi_in_error_recovery);
  493. return add_error_function_node();
  494. }
  495. TokenizedBuffer::Token close_paren =
  496. tokens_.GetMatchedClosingToken(open_paren);
  497. if (!ParseFunctionSignature()) {
  498. // Don't try to parse more of the function declaration, but consume a
  499. // declaration ending semicolon if found (without going to a new line).
  500. SkipPastLikelyEnd(function_intro_token, handle_semi_in_error_recovery);
  501. return add_error_function_node();
  502. }
  503. // See if we should parse a definition which is represented as a code block.
  504. if (NextTokenIs(TokenKind::OpenCurlyBrace())) {
  505. if (!ParseCodeBlock()) {
  506. return add_error_function_node();
  507. }
  508. } else if (!ConsumeAndAddLeafNodeIf(TokenKind::Semi(),
  509. ParseNodeKind::DeclarationEnd())) {
  510. emitter_.EmitError<ExpectedFunctionBodyOrSemi>(*position_);
  511. if (tokens_.GetLine(*position_) == tokens_.GetLine(close_paren)) {
  512. // Only need to skip if we've not already found a new line.
  513. SkipPastLikelyEnd(function_intro_token, handle_semi_in_error_recovery);
  514. }
  515. return add_error_function_node();
  516. }
  517. // Successfully parsed the function, add that node.
  518. return AddNode(ParseNodeKind::FunctionDeclaration(), function_intro_token,
  519. start);
  520. }
  521. auto ParseTree::Parser::ParseVariableDeclaration() -> Node {
  522. // `var` pattern [= expression] `;`
  523. TokenizedBuffer::Token var_token = Consume(TokenKind::VarKeyword());
  524. auto start = GetSubtreeStartPosition();
  525. auto pattern = ParsePattern(PatternKind::Variable);
  526. if (!pattern) {
  527. if (auto after_pattern =
  528. FindNextOf({TokenKind::Equal(), TokenKind::Semi()})) {
  529. SkipTo(*after_pattern);
  530. }
  531. }
  532. auto start_init = GetSubtreeStartPosition();
  533. if (auto equal_token = ConsumeIf(TokenKind::Equal())) {
  534. auto init = ParseExpression();
  535. AddNode(ParseNodeKind::VariableInitializer(), *equal_token, start_init,
  536. /*has_error=*/!init);
  537. }
  538. auto semi = ConsumeAndAddLeafNodeIf(TokenKind::Semi(),
  539. ParseNodeKind::DeclarationEnd());
  540. if (!semi) {
  541. emitter_.EmitError<ExpectedSemiAfterExpression>(*position_);
  542. SkipPastLikelyEnd(var_token, [&](TokenizedBuffer::Token semi) {
  543. return AddLeafNode(ParseNodeKind::DeclarationEnd(), semi);
  544. });
  545. }
  546. return AddNode(ParseNodeKind::VariableDeclaration(), var_token, start,
  547. /*has_error=*/!pattern || !semi);
  548. }
  549. auto ParseTree::Parser::ParseEmptyDeclaration() -> Node {
  550. return AddLeafNode(ParseNodeKind::EmptyDeclaration(),
  551. Consume(TokenKind::Semi()));
  552. }
  553. auto ParseTree::Parser::ParseDeclaration() -> llvm::Optional<Node> {
  554. switch (NextTokenKind()) {
  555. case TokenKind::FnKeyword():
  556. return ParseFunctionDeclaration();
  557. case TokenKind::VarKeyword():
  558. return ParseVariableDeclaration();
  559. case TokenKind::Semi():
  560. return ParseEmptyDeclaration();
  561. case TokenKind::EndOfFile():
  562. return llvm::None;
  563. default:
  564. // Errors are handled outside the switch.
  565. break;
  566. }
  567. // We didn't recognize an introducer for a valid declaration.
  568. emitter_.EmitError<UnrecognizedDeclaration>(*position_);
  569. // Skip forward past any end of a declaration we simply didn't understand so
  570. // that we can find the start of the next declaration or the end of a scope.
  571. if (auto found_semi_n =
  572. SkipPastLikelyEnd(*position_, [&](TokenizedBuffer::Token semi) {
  573. return AddLeafNode(ParseNodeKind::EmptyDeclaration(), semi);
  574. })) {
  575. MarkNodeError(*found_semi_n);
  576. return *found_semi_n;
  577. }
  578. // Nothing, not even a semicolon found.
  579. return llvm::None;
  580. }
  581. auto ParseTree::Parser::ParseParenExpression() -> llvm::Optional<Node> {
  582. // parenthesized-expression ::= `(` expression `)`
  583. // tuple-literal ::= `(` `)`
  584. // ::= `(` expression `,` [expression-list [`,`]] `)`
  585. //
  586. // Parse the union of these, `(` [expression-list [`,`]] `)`, and work out
  587. // whether it's a tuple or a parenthesized expression afterwards.
  588. auto start = GetSubtreeStartPosition();
  589. return ParseParenList(
  590. [&] { return ParseExpression(); }, ParseNodeKind::TupleLiteralComma(),
  591. [&](TokenizedBuffer::Token open_paren, bool is_single_item,
  592. TokenizedBuffer::Token close_paren, bool has_arg_errors) {
  593. AddLeafNode(is_single_item ? ParseNodeKind::ParenExpressionEnd()
  594. : ParseNodeKind::TupleLiteralEnd(),
  595. close_paren);
  596. return AddNode(is_single_item ? ParseNodeKind::ParenExpression()
  597. : ParseNodeKind::TupleLiteral(),
  598. open_paren, start, has_arg_errors);
  599. },
  600. /*allow_trailing_comma=*/true);
  601. }
  602. auto ParseTree::Parser::ParseBraceExpression() -> llvm::Optional<Node> {
  603. // braced-expression ::= `{` [field-value-list] `}`
  604. // ::= `{` field-type-list `}`
  605. // field-value-list ::= field-value [`,`]
  606. // ::= field-value `,` field-value-list
  607. // field-value ::= `.` identifier `=` expression
  608. // field-type-list ::= field-type [`,`]
  609. // ::= field-type `,` field-type-list
  610. // field-type ::= `.` identifier `:` type
  611. //
  612. // Note that `{` `}` is the first form (an empty struct), but that an empty
  613. // struct value also behaves as an empty struct type.
  614. auto start = GetSubtreeStartPosition();
  615. enum Kind { Unknown, Value, Type };
  616. Kind kind = Unknown;
  617. return ParseList(
  618. TokenKind::OpenCurlyBrace(), TokenKind::CloseCurlyBrace(),
  619. [&]() -> llvm::Optional<Node> {
  620. auto start_elem = GetSubtreeStartPosition();
  621. auto diagnose_invalid_syntax = [&] {
  622. emitter_.EmitError<ExpectedStructLiteralField>(
  623. *position_,
  624. {.can_be_type = kind != Value, .can_be_value = kind != Type});
  625. return llvm::None;
  626. };
  627. if (!NextTokenIs(TokenKind::Period())) {
  628. return diagnose_invalid_syntax();
  629. }
  630. auto designator = ParseDesignatorExpression(
  631. start_elem, ParseNodeKind::StructFieldDesignator(),
  632. /*has_errors=*/false);
  633. if (!designator) {
  634. auto recovery_pos = FindNextOf(
  635. {TokenKind::Equal(), TokenKind::Colon(), TokenKind::Comma()});
  636. if (!recovery_pos ||
  637. tokens_.GetKind(*recovery_pos) == TokenKind::Comma()) {
  638. return llvm::None;
  639. }
  640. SkipTo(*recovery_pos);
  641. }
  642. // Work out the kind of this element
  643. Kind elem_kind =
  644. (NextTokenIs(TokenKind::Equal())
  645. ? Value
  646. : NextTokenIs(TokenKind::Colon()) ? Type : Unknown);
  647. if (elem_kind == Unknown || (kind != Unknown && elem_kind != kind)) {
  648. return diagnose_invalid_syntax();
  649. }
  650. kind = elem_kind;
  651. // Struct type fields and value fields use the same grammar except that
  652. // one has a `:` separator and the other has an `=` separator.
  653. auto equal_or_colon_token =
  654. Consume(kind == Type ? TokenKind::Colon() : TokenKind::Equal());
  655. auto type_or_value = ParseExpression();
  656. return AddNode(kind == Type ? ParseNodeKind::StructFieldType()
  657. : ParseNodeKind::StructFieldValue(),
  658. equal_or_colon_token, start_elem,
  659. /*has_error=*/!designator || !type_or_value);
  660. },
  661. ParseNodeKind::StructComma(),
  662. [&](TokenizedBuffer::Token open_brace, bool /*is_single_item*/,
  663. TokenizedBuffer::Token close_brace, bool has_errors) {
  664. AddLeafNode(ParseNodeKind::StructEnd(), close_brace);
  665. return AddNode(kind == Type ? ParseNodeKind::StructTypeLiteral()
  666. : ParseNodeKind::StructLiteral(),
  667. open_brace, start, has_errors);
  668. },
  669. /*allow_trailing_comma=*/true);
  670. }
  671. auto ParseTree::Parser::ParsePrimaryExpression() -> llvm::Optional<Node> {
  672. llvm::Optional<ParseNodeKind> kind;
  673. switch (NextTokenKind()) {
  674. case TokenKind::Identifier():
  675. kind = ParseNodeKind::NameReference();
  676. break;
  677. case TokenKind::IntegerLiteral():
  678. case TokenKind::RealLiteral():
  679. case TokenKind::StringLiteral():
  680. case TokenKind::IntegerTypeLiteral():
  681. case TokenKind::UnsignedIntegerTypeLiteral():
  682. case TokenKind::FloatingPointTypeLiteral():
  683. kind = ParseNodeKind::Literal();
  684. break;
  685. case TokenKind::OpenParen():
  686. return ParseParenExpression();
  687. case TokenKind::OpenCurlyBrace():
  688. return ParseBraceExpression();
  689. default:
  690. emitter_.EmitError<ExpectedExpression>(*position_);
  691. return llvm::None;
  692. }
  693. return AddLeafNode(*kind, Consume(NextTokenKind()));
  694. }
  695. auto ParseTree::Parser::ParseDesignatorExpression(SubtreeStart start,
  696. ParseNodeKind kind,
  697. bool has_errors)
  698. -> llvm::Optional<Node> {
  699. // `.` identifier
  700. auto dot = Consume(TokenKind::Period());
  701. auto name = ConsumeIf(TokenKind::Identifier());
  702. if (name) {
  703. AddLeafNode(ParseNodeKind::DesignatedName(), *name);
  704. } else {
  705. emitter_.EmitError<ExpectedIdentifierAfterDot>(*position_);
  706. // If we see a keyword, assume it was intended to be the designated name.
  707. // TODO: Should keywords be valid in designators?
  708. if (NextTokenKind().IsKeyword()) {
  709. name = Consume(NextTokenKind());
  710. auto name_node = AddLeafNode(ParseNodeKind::DesignatedName(), *name);
  711. MarkNodeError(name_node);
  712. } else {
  713. has_errors = true;
  714. }
  715. }
  716. Node result = AddNode(kind, dot, start, has_errors);
  717. return name ? result : llvm::Optional<Node>();
  718. }
  719. auto ParseTree::Parser::ParseCallExpression(SubtreeStart start, bool has_errors)
  720. -> llvm::Optional<Node> {
  721. // `(` expression-list[opt] `)`
  722. //
  723. // expression-list ::= expression
  724. // ::= expression `,` expression-list
  725. return ParseParenList(
  726. [&] { return ParseExpression(); }, ParseNodeKind::CallExpressionComma(),
  727. [&](TokenizedBuffer::Token open_paren, bool /*is_single_item*/,
  728. TokenizedBuffer::Token close_paren, bool has_arg_errors) {
  729. AddLeafNode(ParseNodeKind::CallExpressionEnd(), close_paren);
  730. return AddNode(ParseNodeKind::CallExpression(), open_paren, start,
  731. has_errors || has_arg_errors);
  732. });
  733. }
  734. auto ParseTree::Parser::ParsePostfixExpression() -> llvm::Optional<Node> {
  735. auto start = GetSubtreeStartPosition();
  736. llvm::Optional<Node> expression = ParsePrimaryExpression();
  737. while (true) {
  738. switch (NextTokenKind()) {
  739. case TokenKind::Period():
  740. expression = ParseDesignatorExpression(
  741. start, ParseNodeKind::DesignatorExpression(), !expression);
  742. break;
  743. case TokenKind::OpenParen():
  744. expression = ParseCallExpression(start, !expression);
  745. break;
  746. default: {
  747. return expression;
  748. }
  749. }
  750. }
  751. }
  752. // Determines whether the given token is considered to be the start of an
  753. // operand according to the rules for infix operator parsing.
  754. static auto IsAssumedStartOfOperand(TokenKind kind) -> bool {
  755. return kind.IsOneOf({TokenKind::OpenParen(), TokenKind::Identifier(),
  756. TokenKind::IntegerLiteral(), TokenKind::RealLiteral(),
  757. TokenKind::StringLiteral()});
  758. }
  759. // Determines whether the given token is considered to be the end of an operand
  760. // according to the rules for infix operator parsing.
  761. static auto IsAssumedEndOfOperand(TokenKind kind) -> bool {
  762. return kind.IsOneOf({TokenKind::CloseParen(), TokenKind::CloseCurlyBrace(),
  763. TokenKind::CloseSquareBracket(), TokenKind::Identifier(),
  764. TokenKind::IntegerLiteral(), TokenKind::RealLiteral(),
  765. TokenKind::StringLiteral()});
  766. }
  767. // Determines whether the given token could possibly be the start of an operand.
  768. // This is conservatively correct, and will never incorrectly return `false`,
  769. // but can incorrectly return `true`.
  770. static auto IsPossibleStartOfOperand(TokenKind kind) -> bool {
  771. return !kind.IsOneOf({TokenKind::CloseParen(), TokenKind::CloseCurlyBrace(),
  772. TokenKind::CloseSquareBracket(), TokenKind::Comma(),
  773. TokenKind::Semi(), TokenKind::Colon()});
  774. }
  775. auto ParseTree::Parser::IsLexicallyValidInfixOperator() -> bool {
  776. CHECK(!AtEndOfFile()) << "Expected an operator token.";
  777. bool leading_space = tokens_.HasLeadingWhitespace(*position_);
  778. bool trailing_space = tokens_.HasTrailingWhitespace(*position_);
  779. // If there's whitespace on both sides, it's an infix operator.
  780. if (leading_space && trailing_space) {
  781. return true;
  782. }
  783. // If there's whitespace on exactly one side, it's not an infix operator.
  784. if (leading_space || trailing_space) {
  785. return false;
  786. }
  787. // Otherwise, for an infix operator, the preceding token must be any close
  788. // bracket, identifier, or literal and the next token must be an open paren,
  789. // identifier, or literal.
  790. if (position_ == tokens_.Tokens().begin() ||
  791. !IsAssumedEndOfOperand(tokens_.GetKind(*(position_ - 1))) ||
  792. !IsAssumedStartOfOperand(tokens_.GetKind(*(position_ + 1)))) {
  793. return false;
  794. }
  795. return true;
  796. }
  797. auto ParseTree::Parser::DiagnoseOperatorFixity(OperatorFixity fixity) -> void {
  798. bool is_valid_as_infix = IsLexicallyValidInfixOperator();
  799. if (fixity == OperatorFixity::Infix) {
  800. // Infix operators must satisfy the infix operator rules.
  801. if (!is_valid_as_infix) {
  802. emitter_.EmitError<BinaryOperatorRequiresWhitespace>(
  803. *position_,
  804. {.has_leading_space = tokens_.HasLeadingWhitespace(*position_),
  805. .has_trailing_space = tokens_.HasTrailingWhitespace(*position_)});
  806. }
  807. } else {
  808. bool prefix = fixity == OperatorFixity::Prefix;
  809. // Whitespace is not permitted between a symbolic pre/postfix operator and
  810. // its operand.
  811. if (NextTokenKind().IsSymbol() &&
  812. (prefix ? tokens_.HasTrailingWhitespace(*position_)
  813. : tokens_.HasLeadingWhitespace(*position_))) {
  814. emitter_.EmitError<UnaryOperatorHasWhitespace>(*position_,
  815. {.prefix = prefix});
  816. }
  817. // Pre/postfix operators must not satisfy the infix operator rules.
  818. if (is_valid_as_infix) {
  819. emitter_.EmitError<UnaryOperatorRequiresWhitespace>(*position_,
  820. {.prefix = prefix});
  821. }
  822. }
  823. }
  824. auto ParseTree::Parser::IsTrailingOperatorInfix() -> bool {
  825. if (AtEndOfFile()) {
  826. return false;
  827. }
  828. // An operator that follows the infix operator rules is parsed as
  829. // infix, unless the next token means that it can't possibly be.
  830. if (IsLexicallyValidInfixOperator() &&
  831. IsPossibleStartOfOperand(tokens_.GetKind(*(position_ + 1)))) {
  832. return true;
  833. }
  834. // A trailing operator with leading whitespace that's not valid as infix is
  835. // not valid at all. If the next token looks like the start of an operand,
  836. // then parse as infix, otherwise as postfix. Either way we'll produce a
  837. // diagnostic later on.
  838. if (tokens_.HasLeadingWhitespace(*position_) &&
  839. IsAssumedStartOfOperand(tokens_.GetKind(*(position_ + 1)))) {
  840. return true;
  841. }
  842. return false;
  843. }
  844. auto ParseTree::Parser::ParseOperatorExpression(
  845. PrecedenceGroup ambient_precedence) -> llvm::Optional<Node> {
  846. auto start = GetSubtreeStartPosition();
  847. llvm::Optional<Node> lhs;
  848. PrecedenceGroup lhs_precedence = PrecedenceGroup::ForPostfixExpression();
  849. // Check for a prefix operator.
  850. if (auto operator_precedence = PrecedenceGroup::ForLeading(NextTokenKind());
  851. !operator_precedence) {
  852. lhs = ParsePostfixExpression();
  853. } else {
  854. if (PrecedenceGroup::GetPriority(ambient_precedence,
  855. *operator_precedence) !=
  856. OperatorPriority::RightFirst) {
  857. // The precedence rules don't permit this prefix operator in this
  858. // context. Diagnose this, but carry on and parse it anyway.
  859. emitter_.EmitError<OperatorRequiresParentheses>(*position_);
  860. } else {
  861. // Check that this operator follows the proper whitespace rules.
  862. DiagnoseOperatorFixity(OperatorFixity::Prefix);
  863. }
  864. auto operator_token = Consume(NextTokenKind());
  865. bool has_errors = !ParseOperatorExpression(*operator_precedence);
  866. lhs = AddNode(ParseNodeKind::PrefixOperator(), operator_token, start,
  867. has_errors);
  868. lhs_precedence = *operator_precedence;
  869. }
  870. // Consume a sequence of infix and postfix operators.
  871. while (auto trailing_operator = PrecedenceGroup::ForTrailing(
  872. NextTokenKind(), IsTrailingOperatorInfix())) {
  873. auto [operator_precedence, is_binary] = *trailing_operator;
  874. // FIXME: If this operator is ambiguous with either the ambient precedence
  875. // or the LHS precedence, and there's a variant with a different fixity
  876. // that would work, use that one instead for error recovery.
  877. if (PrecedenceGroup::GetPriority(ambient_precedence, operator_precedence) !=
  878. OperatorPriority::RightFirst) {
  879. // The precedence rules don't permit this operator in this context. Try
  880. // again in the enclosing expression context.
  881. return lhs;
  882. }
  883. if (PrecedenceGroup::GetPriority(lhs_precedence, operator_precedence) !=
  884. OperatorPriority::LeftFirst) {
  885. // Either the LHS operator and this operator are ambiguous, or the
  886. // LHS operaor is a unary operator that can't be nested within
  887. // this operator. Either way, parentheses are required.
  888. emitter_.EmitError<OperatorRequiresParentheses>(*position_);
  889. lhs = llvm::None;
  890. } else {
  891. DiagnoseOperatorFixity(is_binary ? OperatorFixity::Infix
  892. : OperatorFixity::Postfix);
  893. }
  894. auto operator_token = Consume(NextTokenKind());
  895. if (is_binary) {
  896. auto rhs = ParseOperatorExpression(operator_precedence);
  897. lhs = AddNode(ParseNodeKind::InfixOperator(), operator_token, start,
  898. /*has_error=*/!lhs || !rhs);
  899. } else {
  900. lhs = AddNode(ParseNodeKind::PostfixOperator(), operator_token, start,
  901. /*has_error=*/!lhs);
  902. }
  903. lhs_precedence = operator_precedence;
  904. }
  905. return lhs;
  906. }
  907. auto ParseTree::Parser::ParseExpression() -> llvm::Optional<Node> {
  908. return ParseOperatorExpression(PrecedenceGroup::ForTopLevelExpression());
  909. }
  910. auto ParseTree::Parser::ParseType() -> llvm::Optional<Node> {
  911. return ParseOperatorExpression(PrecedenceGroup::ForType());
  912. }
  913. auto ParseTree::Parser::ParseExpressionStatement() -> llvm::Optional<Node> {
  914. TokenizedBuffer::Token start_token = *position_;
  915. auto start = GetSubtreeStartPosition();
  916. bool has_errors = !ParseExpression();
  917. if (auto semi = ConsumeIf(TokenKind::Semi())) {
  918. return AddNode(ParseNodeKind::ExpressionStatement(), *semi, start,
  919. has_errors);
  920. }
  921. if (!has_errors) {
  922. emitter_.EmitError<ExpectedSemiAfterExpression>(*position_);
  923. }
  924. if (auto recovery_node =
  925. SkipPastLikelyEnd(start_token, [&](TokenizedBuffer::Token semi) {
  926. return AddNode(ParseNodeKind::ExpressionStatement(), semi, start,
  927. true);
  928. })) {
  929. return recovery_node;
  930. }
  931. // Found junk not even followed by a `;`.
  932. return llvm::None;
  933. }
  934. auto ParseTree::Parser::ParseParenCondition(TokenKind introducer)
  935. -> llvm::Optional<Node> {
  936. // `(` expression `)`
  937. auto start = GetSubtreeStartPosition();
  938. auto open_paren = ConsumeIf(TokenKind::OpenParen());
  939. if (!open_paren) {
  940. emitter_.EmitError<ExpectedParenAfter>(*position_,
  941. {.introducer = introducer});
  942. }
  943. auto expr = ParseExpression();
  944. if (!open_paren) {
  945. // Don't expect a matching closing paren if there wasn't an opening paren.
  946. return llvm::None;
  947. }
  948. auto close_paren =
  949. ParseCloseParen(*open_paren, ParseNodeKind::ConditionEnd());
  950. return AddNode(ParseNodeKind::Condition(), *open_paren, start,
  951. /*has_error=*/!expr || !close_paren);
  952. }
  953. auto ParseTree::Parser::ParseIfStatement() -> llvm::Optional<Node> {
  954. auto start = GetSubtreeStartPosition();
  955. auto if_token = Consume(TokenKind::IfKeyword());
  956. auto cond = ParseParenCondition(TokenKind::IfKeyword());
  957. auto then_case = ParseCodeBlock();
  958. bool else_has_errors = false;
  959. if (ConsumeAndAddLeafNodeIf(TokenKind::ElseKeyword(),
  960. ParseNodeKind::IfStatementElse())) {
  961. // 'else if' is permitted as a special case.
  962. if (NextTokenIs(TokenKind::IfKeyword())) {
  963. else_has_errors = !ParseIfStatement();
  964. } else {
  965. else_has_errors = !ParseCodeBlock();
  966. }
  967. }
  968. return AddNode(ParseNodeKind::IfStatement(), if_token, start,
  969. /*has_error=*/!cond || !then_case || else_has_errors);
  970. }
  971. auto ParseTree::Parser::ParseWhileStatement() -> llvm::Optional<Node> {
  972. auto start = GetSubtreeStartPosition();
  973. auto while_token = Consume(TokenKind::WhileKeyword());
  974. auto cond = ParseParenCondition(TokenKind::WhileKeyword());
  975. auto body = ParseCodeBlock();
  976. return AddNode(ParseNodeKind::WhileStatement(), while_token, start,
  977. /*has_error=*/!cond || !body);
  978. }
  979. auto ParseTree::Parser::ParseKeywordStatement(ParseNodeKind kind,
  980. KeywordStatementArgument argument)
  981. -> llvm::Optional<Node> {
  982. auto keyword_kind = NextTokenKind();
  983. assert(keyword_kind.IsKeyword());
  984. auto start = GetSubtreeStartPosition();
  985. auto keyword = Consume(keyword_kind);
  986. bool arg_error = false;
  987. if ((argument == KeywordStatementArgument::Optional &&
  988. NextTokenKind() != TokenKind::Semi()) ||
  989. argument == KeywordStatementArgument::Mandatory) {
  990. arg_error = !ParseExpression();
  991. }
  992. auto semi =
  993. ConsumeAndAddLeafNodeIf(TokenKind::Semi(), ParseNodeKind::StatementEnd());
  994. if (!semi) {
  995. emitter_.EmitError<ExpectedSemiAfter>(*position_,
  996. {.preceding = keyword_kind});
  997. // FIXME: Try to skip to a semicolon to recover.
  998. }
  999. return AddNode(kind, keyword, start, /*has_error=*/!semi || arg_error);
  1000. }
  1001. auto ParseTree::Parser::ParseStatement() -> llvm::Optional<Node> {
  1002. switch (NextTokenKind()) {
  1003. case TokenKind::VarKeyword():
  1004. return ParseVariableDeclaration();
  1005. case TokenKind::IfKeyword():
  1006. return ParseIfStatement();
  1007. case TokenKind::WhileKeyword():
  1008. return ParseWhileStatement();
  1009. case TokenKind::ContinueKeyword():
  1010. return ParseKeywordStatement(ParseNodeKind::ContinueStatement(),
  1011. KeywordStatementArgument::None);
  1012. case TokenKind::BreakKeyword():
  1013. return ParseKeywordStatement(ParseNodeKind::BreakStatement(),
  1014. KeywordStatementArgument::None);
  1015. case TokenKind::ReturnKeyword():
  1016. return ParseKeywordStatement(ParseNodeKind::ReturnStatement(),
  1017. KeywordStatementArgument::Optional);
  1018. default:
  1019. // A statement with no introducer token can only be an expression
  1020. // statement.
  1021. return ParseExpressionStatement();
  1022. }
  1023. }
  1024. } // namespace Carbon