parser_impl.cpp 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "parser/parser_impl.h"
  5. #include <cstdlib>
  6. #include "lexer/token_kind.h"
  7. #include "lexer/tokenized_buffer.h"
  8. #include "llvm/ADT/Optional.h"
  9. #include "llvm/Support/FormatVariadic.h"
  10. #include "llvm/Support/raw_ostream.h"
  11. #include "parser/parse_node_kind.h"
  12. #include "parser/parse_tree.h"
  13. namespace Carbon {
  14. struct UnexpectedTokenInCodeBlock
  15. : SimpleDiagnostic<UnexpectedTokenInCodeBlock> {
  16. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  17. static constexpr llvm::StringLiteral Message =
  18. "Unexpected token in code block.";
  19. };
  20. struct ExpectedFunctionName : SimpleDiagnostic<ExpectedFunctionName> {
  21. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  22. static constexpr llvm::StringLiteral Message =
  23. "Expected function name after `fn` keyword.";
  24. };
  25. struct ExpectedFunctionParams : SimpleDiagnostic<ExpectedFunctionParams> {
  26. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  27. static constexpr llvm::StringLiteral Message =
  28. "Expected `(` after function name.";
  29. };
  30. struct ExpectedFunctionBodyOrSemi
  31. : SimpleDiagnostic<ExpectedFunctionBodyOrSemi> {
  32. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  33. static constexpr llvm::StringLiteral Message =
  34. "Expected function definition or `;` after function declaration.";
  35. };
  36. struct ExpectedVariableName : SimpleDiagnostic<ExpectedVariableName> {
  37. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  38. static constexpr llvm::StringLiteral Message =
  39. "Expected variable name after type in `var` declaration.";
  40. };
  41. struct ExpectedParameterName : SimpleDiagnostic<ExpectedParameterName> {
  42. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  43. static constexpr llvm::StringLiteral Message =
  44. "Expected parameter name after type in parameter declaration.";
  45. };
  46. struct UnrecognizedDeclaration : SimpleDiagnostic<UnrecognizedDeclaration> {
  47. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  48. static constexpr llvm::StringLiteral Message =
  49. "Unrecognized declaration introducer.";
  50. };
  51. struct ExpectedExpression : SimpleDiagnostic<ExpectedExpression> {
  52. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  53. static constexpr llvm::StringLiteral Message = "Expected expression.";
  54. };
  55. struct ExpectedParenAfter : SimpleDiagnostic<ExpectedParenAfter> {
  56. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  57. static constexpr const char* Message = "Expected `(` after `{0}`.";
  58. TokenKind introducer;
  59. auto Format() -> std::string {
  60. return llvm::formatv(Message, introducer.GetFixedSpelling()).str();
  61. }
  62. };
  63. struct ExpectedCloseParen : SimpleDiagnostic<ExpectedCloseParen> {
  64. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  65. static constexpr llvm::StringLiteral Message =
  66. "Unexpected tokens before `)`.";
  67. // TODO: Include the location of the matching open paren in the diagnostic.
  68. TokenizedBuffer::Token open_paren;
  69. };
  70. struct ExpectedSemiAfterExpression
  71. : SimpleDiagnostic<ExpectedSemiAfterExpression> {
  72. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  73. static constexpr llvm::StringLiteral Message =
  74. "Expected `;` after expression.";
  75. };
  76. struct ExpectedSemiAfter : SimpleDiagnostic<ExpectedSemiAfter> {
  77. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  78. static constexpr const char* Message = "Expected `;` after `{0}`.";
  79. TokenKind preceding;
  80. auto Format() -> std::string {
  81. return llvm::formatv(Message, preceding.GetFixedSpelling()).str();
  82. }
  83. };
  84. struct ExpectedIdentifierAfterDot
  85. : SimpleDiagnostic<ExpectedIdentifierAfterDot> {
  86. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  87. static constexpr llvm::StringLiteral Message =
  88. "Expected identifier after `.`.";
  89. };
  90. struct UnexpectedTokenAfterListElement
  91. : SimpleDiagnostic<UnexpectedTokenAfterListElement> {
  92. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  93. static constexpr llvm::StringLiteral Message = "Expected `,` or `)`.";
  94. };
  95. struct OperatorRequiresParentheses
  96. : SimpleDiagnostic<OperatorRequiresParentheses> {
  97. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  98. static constexpr llvm::StringLiteral Message =
  99. "Parentheses are required to disambiguate operator precedence.";
  100. };
  101. ParseTree::Parser::Parser(ParseTree& tree_arg, TokenizedBuffer& tokens_arg,
  102. TokenDiagnosticEmitter& emitter)
  103. : tree(tree_arg),
  104. tokens(tokens_arg),
  105. emitter(emitter),
  106. position(tokens.Tokens().begin()),
  107. end(tokens.Tokens().end()) {
  108. assert(std::find_if(position, end,
  109. [&](TokenizedBuffer::Token t) {
  110. return tokens.GetKind(t) == TokenKind::EndOfFile();
  111. }) != end &&
  112. "No EndOfFileToken in token buffer.");
  113. }
  114. auto ParseTree::Parser::Parse(TokenizedBuffer& tokens,
  115. TokenDiagnosticEmitter& emitter) -> ParseTree {
  116. ParseTree tree(tokens);
  117. // We expect to have a 1:1 correspondence between tokens and tree nodes, so
  118. // reserve the space we expect to need here to avoid allocation and copying
  119. // overhead.
  120. tree.node_impls.reserve(tokens.Size());
  121. Parser parser(tree, tokens, emitter);
  122. while (!parser.AtEndOfFile()) {
  123. if (!parser.ParseDeclaration()) {
  124. // We don't have an enclosing parse tree node to mark as erroneous, so
  125. // just mark the tree as a whole.
  126. tree.has_errors = true;
  127. }
  128. }
  129. parser.AddLeafNode(ParseNodeKind::FileEnd(), *parser.position);
  130. assert(tree.Verify() && "Parse tree built but does not verify!");
  131. return tree;
  132. }
  133. auto ParseTree::Parser::Consume(TokenKind kind) -> TokenizedBuffer::Token {
  134. TokenizedBuffer::Token t = *position;
  135. assert(kind != TokenKind::EndOfFile() && "Cannot consume the EOF token!");
  136. assert(tokens.GetKind(t) == kind && "The current token is the wrong kind!");
  137. ++position;
  138. assert(position != end && "Reached end of tokens without finding EOF token.");
  139. return t;
  140. }
  141. auto ParseTree::Parser::ConsumeIf(TokenKind kind)
  142. -> llvm::Optional<TokenizedBuffer::Token> {
  143. if (tokens.GetKind(*position) != kind) {
  144. return {};
  145. }
  146. return Consume(kind);
  147. }
  148. auto ParseTree::Parser::AddLeafNode(ParseNodeKind kind,
  149. TokenizedBuffer::Token token) -> Node {
  150. Node n(tree.node_impls.size());
  151. tree.node_impls.push_back(NodeImpl(kind, token, /*subtree_size_arg=*/1));
  152. return n;
  153. }
  154. auto ParseTree::Parser::ConsumeAndAddLeafNodeIf(TokenKind t_kind,
  155. ParseNodeKind n_kind)
  156. -> llvm::Optional<Node> {
  157. auto t = ConsumeIf(t_kind);
  158. if (!t) {
  159. return {};
  160. }
  161. return AddLeafNode(n_kind, *t);
  162. }
  163. auto ParseTree::Parser::MarkNodeError(Node n) -> void {
  164. tree.node_impls[n.index].has_error = true;
  165. tree.has_errors = true;
  166. }
  167. // A marker for the start of a node's subtree.
  168. //
  169. // This is used to track the size of the node's subtree. It can be used
  170. // repeatedly if multiple subtrees start at the same position.
  171. struct ParseTree::Parser::SubtreeStart {
  172. int tree_size;
  173. };
  174. auto ParseTree::Parser::StartSubtree() -> SubtreeStart {
  175. return {static_cast<int>(tree.node_impls.size())};
  176. }
  177. auto ParseTree::Parser::AddNode(ParseNodeKind n_kind, TokenizedBuffer::Token t,
  178. SubtreeStart start, bool has_error) -> Node {
  179. // The size of the subtree is the change in size from when we started this
  180. // subtree to now, but including the node we're about to add.
  181. int tree_stop_size = static_cast<int>(tree.node_impls.size()) + 1;
  182. int subtree_size = tree_stop_size - start.tree_size;
  183. Node n(tree.node_impls.size());
  184. tree.node_impls.push_back(NodeImpl(n_kind, t, subtree_size));
  185. if (has_error) {
  186. MarkNodeError(n);
  187. }
  188. return n;
  189. }
  190. auto ParseTree::Parser::SkipMatchingGroup() -> bool {
  191. TokenizedBuffer::Token t = *position;
  192. TokenKind t_kind = tokens.GetKind(t);
  193. if (!t_kind.IsOpeningSymbol()) {
  194. return false;
  195. }
  196. SkipTo(tokens.GetMatchedClosingToken(t));
  197. Consume(t_kind.GetClosingSymbol());
  198. return true;
  199. }
  200. auto ParseTree::Parser::SkipTo(TokenizedBuffer::Token t) -> void {
  201. assert(t >= *position && "Tried to skip backwards.");
  202. position = TokenizedBuffer::TokenIterator(t);
  203. assert(position != end && "Skipped past EOF.");
  204. }
  205. auto ParseTree::Parser::FindNextOf(
  206. std::initializer_list<TokenKind> desired_kinds)
  207. -> llvm::Optional<TokenizedBuffer::Token> {
  208. auto new_position = position;
  209. while (true) {
  210. TokenizedBuffer::Token token = *new_position;
  211. TokenKind kind = tokens.GetKind(token);
  212. for (TokenKind desired_kind : desired_kinds) {
  213. if (kind == desired_kind) {
  214. return token;
  215. }
  216. }
  217. // Step to the next token at the current bracketing level.
  218. if (kind.IsClosingSymbol() || kind == TokenKind::EndOfFile()) {
  219. // There are no more tokens at this level.
  220. return llvm::None;
  221. } else if (kind.IsOpeningSymbol()) {
  222. new_position =
  223. TokenizedBuffer::TokenIterator(tokens.GetMatchedClosingToken(token));
  224. } else {
  225. ++new_position;
  226. }
  227. }
  228. }
  229. auto ParseTree::Parser::SkipPastLikelyEnd(TokenizedBuffer::Token skip_root,
  230. SemiHandler on_semi)
  231. -> llvm::Optional<Node> {
  232. if (AtEndOfFile()) {
  233. return llvm::None;
  234. }
  235. TokenizedBuffer::Line root_line = tokens.GetLine(skip_root);
  236. int root_line_indent = tokens.GetIndentColumnNumber(root_line);
  237. // We will keep scanning through tokens on the same line as the root or
  238. // lines with greater indentation than root's line.
  239. auto is_same_line_or_indent_greater_than_root =
  240. [&](TokenizedBuffer::Token t) {
  241. TokenizedBuffer::Line l = tokens.GetLine(t);
  242. if (l == root_line) {
  243. return true;
  244. }
  245. return tokens.GetIndentColumnNumber(l) > root_line_indent;
  246. };
  247. do {
  248. TokenKind current_kind = tokens.GetKind(*position);
  249. if (current_kind == TokenKind::CloseCurlyBrace()) {
  250. // Immediately bail out if we hit an unmatched close curly, this will
  251. // pop us up a level of the syntax grouping.
  252. return llvm::None;
  253. }
  254. // We assume that a semicolon is always intended to be the end of the
  255. // current construct.
  256. if (auto semi = ConsumeIf(TokenKind::Semi())) {
  257. return on_semi(*semi);
  258. }
  259. // Skip over any matching group of tokens.
  260. if (SkipMatchingGroup()) {
  261. continue;
  262. }
  263. // Otherwise just step forward one token.
  264. Consume(current_kind);
  265. } while (!AtEndOfFile() &&
  266. is_same_line_or_indent_greater_than_root(*position));
  267. return llvm::None;
  268. }
  269. auto ParseTree::Parser::ParseCloseParen(TokenizedBuffer::Token open_paren,
  270. ParseNodeKind kind)
  271. -> llvm::Optional<Node> {
  272. if (auto close_paren =
  273. ConsumeAndAddLeafNodeIf(TokenKind::CloseParen(), kind)) {
  274. return close_paren;
  275. }
  276. emitter.EmitError<ExpectedCloseParen>(*position, {.open_paren = open_paren});
  277. SkipTo(tokens.GetMatchedClosingToken(open_paren));
  278. AddLeafNode(kind, Consume(TokenKind::CloseParen()));
  279. return llvm::None;
  280. }
  281. template <typename ListElementParser, typename ListCompletionHandler>
  282. auto ParseTree::Parser::ParseParenList(ListElementParser list_element_parser,
  283. ParseNodeKind comma_kind,
  284. ListCompletionHandler list_handler)
  285. -> llvm::Optional<Node> {
  286. // `(` element-list[opt] `)`
  287. //
  288. // element-list ::= element
  289. // ::= element `,` element-list
  290. TokenizedBuffer::Token open_paren = Consume(TokenKind::OpenParen());
  291. bool has_errors = false;
  292. // Parse elements, if any are specified.
  293. if (tokens.GetKind(*position) != TokenKind::CloseParen()) {
  294. while (true) {
  295. bool element_error = !list_element_parser();
  296. has_errors |= element_error;
  297. TokenKind kind = tokens.GetKind(*position);
  298. if (kind != TokenKind::CloseParen() && kind != TokenKind::Comma()) {
  299. if (!element_error) {
  300. emitter.EmitError<UnexpectedTokenAfterListElement>(*position);
  301. }
  302. has_errors = true;
  303. auto end_of_element =
  304. FindNextOf({TokenKind::Comma(), TokenKind::CloseParen()});
  305. // The lexer guarantees that parentheses are balanced.
  306. assert(end_of_element && "missing matching `)` for `(`");
  307. SkipTo(*end_of_element);
  308. }
  309. if (tokens.GetKind(*position) == TokenKind::CloseParen()) {
  310. break;
  311. }
  312. assert(tokens.GetKind(*position) == TokenKind::Comma());
  313. AddLeafNode(comma_kind, Consume(TokenKind::Comma()));
  314. }
  315. }
  316. return list_handler(open_paren, Consume(TokenKind::CloseParen()), has_errors);
  317. }
  318. auto ParseTree::Parser::ParseFunctionParameter() -> llvm::Optional<Node> {
  319. // A parameter is of the form
  320. // type identifier
  321. auto start = StartSubtree();
  322. auto type = ParseType();
  323. // FIXME: We can't use DeclaredName here because we need to use the
  324. // identifier token as the root token in the parameter node.
  325. auto name = ConsumeIf(TokenKind::Identifier());
  326. if (!name) {
  327. emitter.EmitError<ExpectedParameterName>(*position);
  328. return llvm::None;
  329. }
  330. return AddNode(ParseNodeKind::ParameterDeclaration(), *name, start,
  331. /*has_error=*/!type);
  332. }
  333. auto ParseTree::Parser::ParseFunctionSignature() -> bool {
  334. auto start = StartSubtree();
  335. auto params = ParseParenList(
  336. [&] { return ParseFunctionParameter(); },
  337. ParseNodeKind::ParameterListComma(),
  338. [&](TokenizedBuffer::Token open_paren, TokenizedBuffer::Token close_paren,
  339. bool has_errors) {
  340. AddLeafNode(ParseNodeKind::ParameterListEnd(), close_paren);
  341. return AddNode(ParseNodeKind::ParameterList(), open_paren, start,
  342. has_errors);
  343. });
  344. auto start_return_type = StartSubtree();
  345. if (auto arrow = ConsumeIf(TokenKind::MinusGreater())) {
  346. auto return_type = ParseType();
  347. AddNode(ParseNodeKind::ReturnType(), *arrow, start_return_type,
  348. /*has_error=*/!return_type);
  349. if (!return_type) {
  350. return false;
  351. }
  352. }
  353. return params.hasValue();
  354. }
  355. auto ParseTree::Parser::ParseCodeBlock() -> Node {
  356. TokenizedBuffer::Token open_curly = Consume(TokenKind::OpenCurlyBrace());
  357. auto start = StartSubtree();
  358. bool has_errors = false;
  359. // Loop over all the different possibly nested elements in the code block.
  360. while (tokens.GetKind(*position) != TokenKind::CloseCurlyBrace()) {
  361. if (!ParseStatement()) {
  362. // We detected and diagnosed an error of some kind. We can trivially skip
  363. // to the actual close curly brace from here.
  364. // FIXME: It would be better to skip to the next semicolon, or the next
  365. // token at the start of a line with the same indent as this one.
  366. SkipTo(tokens.GetMatchedClosingToken(open_curly));
  367. has_errors = true;
  368. break;
  369. }
  370. }
  371. // We always reach here having set our position in the token stream to the
  372. // close curly brace.
  373. AddLeafNode(ParseNodeKind::CodeBlockEnd(),
  374. Consume(TokenKind::CloseCurlyBrace()));
  375. return AddNode(ParseNodeKind::CodeBlock(), open_curly, start, has_errors);
  376. }
  377. auto ParseTree::Parser::ParseFunctionDeclaration() -> Node {
  378. TokenizedBuffer::Token function_intro_token = Consume(TokenKind::FnKeyword());
  379. auto start = StartSubtree();
  380. auto add_error_function_node = [&] {
  381. return AddNode(ParseNodeKind::FunctionDeclaration(), function_intro_token,
  382. start, /*has_error=*/true);
  383. };
  384. auto handle_semi_in_error_recovery = [&](TokenizedBuffer::Token semi) {
  385. return AddLeafNode(ParseNodeKind::DeclarationEnd(), semi);
  386. };
  387. auto name_n = ConsumeAndAddLeafNodeIf(TokenKind::Identifier(),
  388. ParseNodeKind::DeclaredName());
  389. if (!name_n) {
  390. emitter.EmitError<ExpectedFunctionName>(*position);
  391. // FIXME: We could change the lexer to allow us to synthesize certain
  392. // kinds of tokens and try to "recover" here, but unclear that this is
  393. // really useful.
  394. SkipPastLikelyEnd(function_intro_token, handle_semi_in_error_recovery);
  395. return add_error_function_node();
  396. }
  397. TokenizedBuffer::Token open_paren = *position;
  398. if (tokens.GetKind(open_paren) != TokenKind::OpenParen()) {
  399. emitter.EmitError<ExpectedFunctionParams>(open_paren);
  400. SkipPastLikelyEnd(function_intro_token, handle_semi_in_error_recovery);
  401. return add_error_function_node();
  402. }
  403. TokenizedBuffer::Token close_paren =
  404. tokens.GetMatchedClosingToken(open_paren);
  405. if (!ParseFunctionSignature()) {
  406. // Don't try to parse more of the function declaration, but consume a
  407. // declaration ending semicolon if found (without going to a new line).
  408. SkipPastLikelyEnd(function_intro_token, handle_semi_in_error_recovery);
  409. return add_error_function_node();
  410. }
  411. // See if we should parse a definition which is represented as a code block.
  412. if (tokens.GetKind(*position) == TokenKind::OpenCurlyBrace()) {
  413. ParseCodeBlock();
  414. } else if (!ConsumeAndAddLeafNodeIf(TokenKind::Semi(),
  415. ParseNodeKind::DeclarationEnd())) {
  416. emitter.EmitError<ExpectedFunctionBodyOrSemi>(*position);
  417. if (tokens.GetLine(*position) == tokens.GetLine(close_paren)) {
  418. // Only need to skip if we've not already found a new line.
  419. SkipPastLikelyEnd(function_intro_token, handle_semi_in_error_recovery);
  420. }
  421. return add_error_function_node();
  422. }
  423. // Successfully parsed the function, add that node.
  424. return AddNode(ParseNodeKind::FunctionDeclaration(), function_intro_token,
  425. start);
  426. }
  427. auto ParseTree::Parser::ParseVariableDeclaration() -> Node {
  428. // `var` expression identifier [= expression] `;`
  429. TokenizedBuffer::Token var_token = Consume(TokenKind::VarKeyword());
  430. auto start = StartSubtree();
  431. auto type = ParseType();
  432. auto name = ConsumeAndAddLeafNodeIf(TokenKind::Identifier(),
  433. ParseNodeKind::DeclaredName());
  434. if (!name) {
  435. emitter.EmitError<ExpectedVariableName>(*position);
  436. if (auto after_name = FindNextOf({TokenKind::Equal(), TokenKind::Semi()})) {
  437. SkipTo(*after_name);
  438. }
  439. }
  440. auto start_init = StartSubtree();
  441. if (auto equal_token = ConsumeIf(TokenKind::Equal())) {
  442. auto init = ParseExpression();
  443. AddNode(ParseNodeKind::VariableInitializer(), *equal_token, start_init,
  444. /*has_error=*/!init);
  445. }
  446. auto semi = ConsumeAndAddLeafNodeIf(TokenKind::Semi(),
  447. ParseNodeKind::DeclarationEnd());
  448. if (!semi) {
  449. SkipPastLikelyEnd(var_token, [&](TokenizedBuffer::Token semi) {
  450. return AddLeafNode(ParseNodeKind::DeclarationEnd(), semi);
  451. });
  452. }
  453. return AddNode(ParseNodeKind::VariableDeclaration(), var_token, start,
  454. /*has_error=*/!type || !name || !semi);
  455. }
  456. auto ParseTree::Parser::ParseEmptyDeclaration() -> Node {
  457. return AddLeafNode(ParseNodeKind::EmptyDeclaration(),
  458. Consume(TokenKind::Semi()));
  459. }
  460. auto ParseTree::Parser::ParseDeclaration() -> llvm::Optional<Node> {
  461. TokenizedBuffer::Token t = *position;
  462. switch (tokens.GetKind(t)) {
  463. case TokenKind::FnKeyword():
  464. return ParseFunctionDeclaration();
  465. case TokenKind::VarKeyword():
  466. return ParseVariableDeclaration();
  467. case TokenKind::Semi():
  468. return ParseEmptyDeclaration();
  469. case TokenKind::EndOfFile():
  470. return llvm::None;
  471. default:
  472. // Errors are handled outside the switch.
  473. break;
  474. }
  475. // We didn't recognize an introducer for a valid declaration.
  476. emitter.EmitError<UnrecognizedDeclaration>(t);
  477. // Skip forward past any end of a declaration we simply didn't understand so
  478. // that we can find the start of the next declaration or the end of a scope.
  479. if (auto found_semi_n =
  480. SkipPastLikelyEnd(t, [&](TokenizedBuffer::Token semi) {
  481. return AddLeafNode(ParseNodeKind::EmptyDeclaration(), semi);
  482. })) {
  483. MarkNodeError(*found_semi_n);
  484. return *found_semi_n;
  485. }
  486. // Nothing, not even a semicolon found.
  487. return llvm::None;
  488. }
  489. auto ParseTree::Parser::ParseParenExpression() -> llvm::Optional<Node> {
  490. // `(` expression `)`
  491. auto start = StartSubtree();
  492. TokenizedBuffer::Token open_paren = Consume(TokenKind::OpenParen());
  493. // TODO: If the next token is a close paren, build an empty tuple literal.
  494. auto expr = ParseExpression();
  495. // TODO: If the next token is a comma, build a tuple literal.
  496. auto close_paren =
  497. ParseCloseParen(open_paren, ParseNodeKind::ParenExpressionEnd());
  498. return AddNode(ParseNodeKind::ParenExpression(), open_paren, start,
  499. /*has_errors=*/!expr || !close_paren);
  500. }
  501. auto ParseTree::Parser::ParsePrimaryExpression() -> llvm::Optional<Node> {
  502. TokenizedBuffer::Token t = *position;
  503. TokenKind token_kind = tokens.GetKind(t);
  504. llvm::Optional<ParseNodeKind> kind;
  505. switch (token_kind) {
  506. case TokenKind::Identifier():
  507. kind = ParseNodeKind::NameReference();
  508. break;
  509. case TokenKind::IntegerLiteral():
  510. case TokenKind::RealLiteral():
  511. case TokenKind::StringLiteral():
  512. kind = ParseNodeKind::Literal();
  513. break;
  514. case TokenKind::OpenParen():
  515. return ParseParenExpression();
  516. default:
  517. emitter.EmitError<ExpectedExpression>(t);
  518. return llvm::None;
  519. }
  520. return AddLeafNode(*kind, Consume(token_kind));
  521. }
  522. auto ParseTree::Parser::ParseDesignatorExpression(SubtreeStart start,
  523. bool has_errors)
  524. -> llvm::Optional<Node> {
  525. // `.` identifier
  526. auto dot = Consume(TokenKind::Period());
  527. auto name = ConsumeIf(TokenKind::Identifier());
  528. if (name) {
  529. AddLeafNode(ParseNodeKind::DesignatedName(), *name);
  530. } else {
  531. // If we see a keyword, assume it was intended to be the designated name.
  532. // TODO: Should keywords be valid in designators?
  533. if (tokens.GetKind(*position).IsKeyword()) {
  534. Consume(tokens.GetKind(*position));
  535. }
  536. emitter.EmitError<ExpectedIdentifierAfterDot>(*position);
  537. has_errors = true;
  538. }
  539. return AddNode(ParseNodeKind::DesignatorExpression(), dot, start, has_errors);
  540. }
  541. auto ParseTree::Parser::ParseCallExpression(SubtreeStart start, bool has_errors)
  542. -> llvm::Optional<Node> {
  543. // `(` expression-list[opt] `)`
  544. //
  545. // expression-list ::= expression
  546. // ::= expression `,` expression-list
  547. return ParseParenList(
  548. [&] { return ParseExpression(); }, ParseNodeKind::CallExpressionComma(),
  549. [&](TokenizedBuffer::Token open_paren, TokenizedBuffer::Token close_paren,
  550. bool has_arg_errors) {
  551. AddLeafNode(ParseNodeKind::CallExpressionEnd(), close_paren);
  552. return AddNode(ParseNodeKind::CallExpression(), open_paren, start,
  553. has_errors || has_arg_errors);
  554. });
  555. }
  556. auto ParseTree::Parser::ParsePostfixExpression() -> llvm::Optional<Node> {
  557. auto start = StartSubtree();
  558. llvm::Optional<Node> expression = ParsePrimaryExpression();
  559. while (true) {
  560. switch (tokens.GetKind(*position)) {
  561. case TokenKind::Period():
  562. expression = ParseDesignatorExpression(start, !expression);
  563. break;
  564. case TokenKind::OpenParen():
  565. expression = ParseCallExpression(start, !expression);
  566. break;
  567. default: {
  568. return expression;
  569. }
  570. }
  571. }
  572. }
  573. auto ParseTree::Parser::ParseOperatorExpression(
  574. PrecedenceGroup ambient_precedence) -> llvm::Optional<Node> {
  575. auto start = StartSubtree();
  576. llvm::Optional<Node> lhs;
  577. PrecedenceGroup lhs_precedence = PrecedenceGroup::ForPostfixExpression();
  578. // Check for a prefix operator.
  579. if (auto operator_precedence =
  580. PrecedenceGroup::ForLeading(tokens.GetKind(*position));
  581. !operator_precedence) {
  582. lhs = ParsePostfixExpression();
  583. } else {
  584. if (PrecedenceGroup::GetPriority(ambient_precedence,
  585. *operator_precedence) !=
  586. OperatorPriority::RightFirst) {
  587. // The precedence rules don't permit this prefix operator in this
  588. // context. Diagnose this, but carry on and parse it anyway.
  589. emitter.EmitError<OperatorRequiresParentheses>(*position);
  590. }
  591. auto operator_token = Consume(tokens.GetKind(*position));
  592. bool has_errors = !ParseOperatorExpression(*operator_precedence);
  593. lhs = AddNode(ParseNodeKind::PrefixOperator(), operator_token, start,
  594. has_errors);
  595. lhs_precedence = *operator_precedence;
  596. }
  597. // Consume a sequence of infix and postfix operators.
  598. while (auto trailing_operator =
  599. PrecedenceGroup::ForTrailing(tokens.GetKind(*position))) {
  600. auto [operator_precedence, is_binary] = *trailing_operator;
  601. if (PrecedenceGroup::GetPriority(ambient_precedence, operator_precedence) !=
  602. OperatorPriority::RightFirst) {
  603. // The precedence rules don't permit this operator in this context. Try
  604. // again in the enclosing expression context.
  605. return lhs;
  606. }
  607. if (PrecedenceGroup::GetPriority(lhs_precedence, operator_precedence) !=
  608. OperatorPriority::LeftFirst) {
  609. // Either the LHS operator and this operator are ambiguous, or the
  610. // LHS operaor is a unary operator that can't be nested within
  611. // this operator. Either way, parentheses are required.
  612. emitter.EmitError<OperatorRequiresParentheses>(*position);
  613. lhs = llvm::None;
  614. }
  615. auto operator_token = Consume(tokens.GetKind(*position));
  616. if (is_binary) {
  617. auto rhs = ParseOperatorExpression(operator_precedence);
  618. lhs = AddNode(ParseNodeKind::InfixOperator(), operator_token, start,
  619. /*has_error=*/!lhs || !rhs);
  620. } else {
  621. lhs = AddNode(ParseNodeKind::PostfixOperator(), operator_token, start,
  622. /*has_error=*/!lhs);
  623. }
  624. lhs_precedence = operator_precedence;
  625. }
  626. return lhs;
  627. }
  628. auto ParseTree::Parser::ParseExpression() -> llvm::Optional<Node> {
  629. return ParseOperatorExpression(PrecedenceGroup::ForTopLevelExpression());
  630. }
  631. auto ParseTree::Parser::ParseExpressionStatement() -> llvm::Optional<Node> {
  632. TokenizedBuffer::Token start_token = *position;
  633. auto start = StartSubtree();
  634. bool has_errors = !ParseExpression();
  635. if (auto semi = ConsumeIf(TokenKind::Semi())) {
  636. return AddNode(ParseNodeKind::ExpressionStatement(), *semi, start,
  637. has_errors);
  638. }
  639. if (!has_errors) {
  640. emitter.EmitError<ExpectedSemiAfterExpression>(*position);
  641. }
  642. if (auto recovery_node =
  643. SkipPastLikelyEnd(start_token, [&](TokenizedBuffer::Token semi) {
  644. return AddNode(ParseNodeKind::ExpressionStatement(), semi, start,
  645. true);
  646. })) {
  647. return recovery_node;
  648. }
  649. // Found junk not even followed by a `;`.
  650. return llvm::None;
  651. }
  652. auto ParseTree::Parser::ParseParenCondition(TokenKind introducer)
  653. -> llvm::Optional<Node> {
  654. // `(` expression `)`
  655. auto start = StartSubtree();
  656. auto open_paren = ConsumeIf(TokenKind::OpenParen());
  657. if (!open_paren) {
  658. emitter.EmitError<ExpectedParenAfter>(*position,
  659. {.introducer = introducer});
  660. }
  661. auto expr = ParseExpression();
  662. if (!open_paren) {
  663. // Don't expect a matching closing paren if there wasn't an opening paren.
  664. return llvm::None;
  665. }
  666. auto close_paren =
  667. ParseCloseParen(*open_paren, ParseNodeKind::ConditionEnd());
  668. return AddNode(ParseNodeKind::Condition(), *open_paren, start,
  669. /*has_errors=*/!expr || !close_paren);
  670. }
  671. auto ParseTree::Parser::ParseIfStatement() -> llvm::Optional<Node> {
  672. auto start = StartSubtree();
  673. auto if_token = Consume(TokenKind::IfKeyword());
  674. auto cond = ParseParenCondition(TokenKind::IfKeyword());
  675. auto then_case = ParseStatement();
  676. bool else_has_errors = false;
  677. if (ConsumeAndAddLeafNodeIf(TokenKind::ElseKeyword(),
  678. ParseNodeKind::IfStatementElse())) {
  679. else_has_errors = !ParseStatement();
  680. }
  681. return AddNode(ParseNodeKind::IfStatement(), if_token, start,
  682. /*has_errors=*/!cond || !then_case || else_has_errors);
  683. }
  684. auto ParseTree::Parser::ParseWhileStatement() -> llvm::Optional<Node> {
  685. auto start = StartSubtree();
  686. auto while_token = Consume(TokenKind::WhileKeyword());
  687. auto cond = ParseParenCondition(TokenKind::WhileKeyword());
  688. auto body = ParseStatement();
  689. return AddNode(ParseNodeKind::WhileStatement(), while_token, start,
  690. /*has_errors=*/!cond || !body);
  691. }
  692. auto ParseTree::Parser::ParseKeywordStatement(ParseNodeKind kind,
  693. KeywordStatementArgument argument)
  694. -> llvm::Optional<Node> {
  695. auto keyword_kind = tokens.GetKind(*position);
  696. assert(keyword_kind.IsKeyword());
  697. auto start = StartSubtree();
  698. auto keyword = Consume(keyword_kind);
  699. bool arg_error = false;
  700. if ((argument == KeywordStatementArgument::Optional &&
  701. tokens.GetKind(*position) != TokenKind::Semi()) ||
  702. argument == KeywordStatementArgument::Mandatory) {
  703. arg_error = !ParseExpression();
  704. }
  705. auto semi =
  706. ConsumeAndAddLeafNodeIf(TokenKind::Semi(), ParseNodeKind::StatementEnd());
  707. if (!semi) {
  708. emitter.EmitError<ExpectedSemiAfter>(*position,
  709. {.preceding = keyword_kind});
  710. // FIXME: Try to skip to a semicolon to recover.
  711. }
  712. return AddNode(kind, keyword, start, /*has_errors=*/!semi || arg_error);
  713. }
  714. auto ParseTree::Parser::ParseStatement() -> llvm::Optional<Node> {
  715. switch (tokens.GetKind(*position)) {
  716. case TokenKind::VarKeyword():
  717. return ParseVariableDeclaration();
  718. case TokenKind::IfKeyword():
  719. return ParseIfStatement();
  720. case TokenKind::WhileKeyword():
  721. return ParseWhileStatement();
  722. case TokenKind::ContinueKeyword():
  723. return ParseKeywordStatement(ParseNodeKind::ContinueStatement(),
  724. KeywordStatementArgument::None);
  725. case TokenKind::BreakKeyword():
  726. return ParseKeywordStatement(ParseNodeKind::BreakStatement(),
  727. KeywordStatementArgument::None);
  728. case TokenKind::ReturnKeyword():
  729. return ParseKeywordStatement(ParseNodeKind::ReturnStatement(),
  730. KeywordStatementArgument::Optional);
  731. case TokenKind::OpenCurlyBrace():
  732. return ParseCodeBlock();
  733. default:
  734. // A statement with no introducer token can only be an expression
  735. // statement.
  736. return ParseExpressionStatement();
  737. }
  738. }
  739. } // namespace Carbon