parser_impl.cpp 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "toolchain/parser/parser_impl.h"
  5. #include <cstdlib>
  6. #include "llvm/ADT/Optional.h"
  7. #include "llvm/Support/FormatVariadic.h"
  8. #include "llvm/Support/raw_ostream.h"
  9. #include "toolchain/lexer/token_kind.h"
  10. #include "toolchain/lexer/tokenized_buffer.h"
  11. #include "toolchain/parser/parse_node_kind.h"
  12. #include "toolchain/parser/parse_tree.h"
  13. namespace Carbon {
  14. struct UnexpectedTokenInCodeBlock
  15. : SimpleDiagnostic<UnexpectedTokenInCodeBlock> {
  16. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  17. static constexpr llvm::StringLiteral Message =
  18. "Unexpected token in code block.";
  19. };
  20. struct ExpectedFunctionName : SimpleDiagnostic<ExpectedFunctionName> {
  21. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  22. static constexpr llvm::StringLiteral Message =
  23. "Expected function name after `fn` keyword.";
  24. };
  25. struct ExpectedFunctionParams : SimpleDiagnostic<ExpectedFunctionParams> {
  26. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  27. static constexpr llvm::StringLiteral Message =
  28. "Expected `(` after function name.";
  29. };
  30. struct ExpectedFunctionBodyOrSemi
  31. : SimpleDiagnostic<ExpectedFunctionBodyOrSemi> {
  32. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  33. static constexpr llvm::StringLiteral Message =
  34. "Expected function definition or `;` after function declaration.";
  35. };
  36. struct ExpectedVariableName : SimpleDiagnostic<ExpectedVariableName> {
  37. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  38. static constexpr llvm::StringLiteral Message =
  39. "Expected pattern in `var` declaration.";
  40. };
  41. struct ExpectedParameterName : SimpleDiagnostic<ExpectedParameterName> {
  42. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  43. static constexpr llvm::StringLiteral Message =
  44. "Expected parameter declaration.";
  45. };
  46. struct UnrecognizedDeclaration : SimpleDiagnostic<UnrecognizedDeclaration> {
  47. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  48. static constexpr llvm::StringLiteral Message =
  49. "Unrecognized declaration introducer.";
  50. };
  51. struct ExpectedExpression : SimpleDiagnostic<ExpectedExpression> {
  52. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  53. static constexpr llvm::StringLiteral Message = "Expected expression.";
  54. };
  55. struct ExpectedParenAfter : SimpleDiagnostic<ExpectedParenAfter> {
  56. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  57. static constexpr const char* Message = "Expected `(` after `{0}`.";
  58. TokenKind introducer;
  59. auto Format() -> std::string {
  60. return llvm::formatv(Message, introducer.GetFixedSpelling()).str();
  61. }
  62. };
  63. struct ExpectedCloseParen : SimpleDiagnostic<ExpectedCloseParen> {
  64. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  65. static constexpr llvm::StringLiteral Message =
  66. "Unexpected tokens before `)`.";
  67. // TODO: Include the location of the matching open paren in the diagnostic.
  68. TokenizedBuffer::Token open_paren;
  69. };
  70. struct ExpectedSemiAfterExpression
  71. : SimpleDiagnostic<ExpectedSemiAfterExpression> {
  72. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  73. static constexpr llvm::StringLiteral Message =
  74. "Expected `;` after expression.";
  75. };
  76. struct ExpectedSemiAfter : SimpleDiagnostic<ExpectedSemiAfter> {
  77. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  78. static constexpr const char* Message = "Expected `;` after `{0}`.";
  79. TokenKind preceding;
  80. auto Format() -> std::string {
  81. return llvm::formatv(Message, preceding.GetFixedSpelling()).str();
  82. }
  83. };
  84. struct ExpectedIdentifierAfterDot
  85. : SimpleDiagnostic<ExpectedIdentifierAfterDot> {
  86. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  87. static constexpr llvm::StringLiteral Message =
  88. "Expected identifier after `.`.";
  89. };
  90. struct UnexpectedTokenAfterListElement
  91. : SimpleDiagnostic<UnexpectedTokenAfterListElement> {
  92. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  93. static constexpr llvm::StringLiteral Message = "Expected `,` or `)`.";
  94. };
  95. struct BinaryOperatorRequiresWhitespace
  96. : SimpleDiagnostic<BinaryOperatorRequiresWhitespace> {
  97. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  98. static constexpr const char* Message =
  99. "Whitespace missing {0} binary operator.";
  100. bool has_leading_space;
  101. bool has_trailing_space;
  102. auto Format() -> std::string {
  103. const char* where = "around";
  104. // clang-format off
  105. if (has_leading_space) {
  106. where = "after";
  107. } else if (has_trailing_space) {
  108. where = "before";
  109. }
  110. // clang-format on
  111. return llvm::formatv(Message, where);
  112. }
  113. };
  114. struct UnaryOperatorHasWhitespace
  115. : SimpleDiagnostic<UnaryOperatorHasWhitespace> {
  116. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  117. static constexpr const char* Message =
  118. "Whitespace is not allowed {0} this unary operator.";
  119. bool prefix;
  120. auto Format() -> std::string {
  121. return llvm::formatv(Message, prefix ? "after" : "before");
  122. }
  123. };
  124. struct UnaryOperatorRequiresWhitespace
  125. : SimpleDiagnostic<UnaryOperatorRequiresWhitespace> {
  126. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  127. static constexpr const char* Message =
  128. "Whitespace is required {0} this unary operator.";
  129. bool prefix;
  130. auto Format() -> std::string {
  131. return llvm::formatv(Message, prefix ? "before" : "after");
  132. }
  133. };
  134. struct OperatorRequiresParentheses
  135. : SimpleDiagnostic<OperatorRequiresParentheses> {
  136. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  137. static constexpr llvm::StringLiteral Message =
  138. "Parentheses are required to disambiguate operator precedence.";
  139. };
  140. ParseTree::Parser::Parser(ParseTree& tree_arg, TokenizedBuffer& tokens_arg,
  141. TokenDiagnosticEmitter& emitter)
  142. : tree(tree_arg),
  143. tokens(tokens_arg),
  144. emitter(emitter),
  145. position(tokens.Tokens().begin()),
  146. end(tokens.Tokens().end()) {
  147. assert(std::find_if(position, end,
  148. [&](TokenizedBuffer::Token t) {
  149. return tokens.GetKind(t) == TokenKind::EndOfFile();
  150. }) != end &&
  151. "No EndOfFileToken in token buffer.");
  152. }
  153. auto ParseTree::Parser::Parse(TokenizedBuffer& tokens,
  154. TokenDiagnosticEmitter& emitter) -> ParseTree {
  155. ParseTree tree(tokens);
  156. // We expect to have a 1:1 correspondence between tokens and tree nodes, so
  157. // reserve the space we expect to need here to avoid allocation and copying
  158. // overhead.
  159. tree.node_impls.reserve(tokens.Size());
  160. Parser parser(tree, tokens, emitter);
  161. while (!parser.AtEndOfFile()) {
  162. if (!parser.ParseDeclaration()) {
  163. // We don't have an enclosing parse tree node to mark as erroneous, so
  164. // just mark the tree as a whole.
  165. tree.has_errors = true;
  166. }
  167. }
  168. parser.AddLeafNode(ParseNodeKind::FileEnd(), *parser.position);
  169. assert(tree.Verify() && "Parse tree built but does not verify!");
  170. return tree;
  171. }
  172. auto ParseTree::Parser::Consume(TokenKind kind) -> TokenizedBuffer::Token {
  173. assert(kind != TokenKind::EndOfFile() && "Cannot consume the EOF token!");
  174. assert(NextTokenIs(kind) && "The current token is the wrong kind!");
  175. TokenizedBuffer::Token t = *position;
  176. ++position;
  177. assert(position != end && "Reached end of tokens without finding EOF token.");
  178. return t;
  179. }
  180. auto ParseTree::Parser::ConsumeIf(TokenKind kind)
  181. -> llvm::Optional<TokenizedBuffer::Token> {
  182. if (!NextTokenIs(kind)) {
  183. return {};
  184. }
  185. return Consume(kind);
  186. }
  187. auto ParseTree::Parser::AddLeafNode(ParseNodeKind kind,
  188. TokenizedBuffer::Token token) -> Node {
  189. Node n(tree.node_impls.size());
  190. tree.node_impls.push_back(NodeImpl(kind, token, /*subtree_size_arg=*/1));
  191. return n;
  192. }
  193. auto ParseTree::Parser::ConsumeAndAddLeafNodeIf(TokenKind t_kind,
  194. ParseNodeKind n_kind)
  195. -> llvm::Optional<Node> {
  196. auto t = ConsumeIf(t_kind);
  197. if (!t) {
  198. return {};
  199. }
  200. return AddLeafNode(n_kind, *t);
  201. }
  202. auto ParseTree::Parser::MarkNodeError(Node n) -> void {
  203. tree.node_impls[n.index].has_error = true;
  204. tree.has_errors = true;
  205. }
  206. // A marker for the start of a node's subtree.
  207. //
  208. // This is used to track the size of the node's subtree. It can be used
  209. // repeatedly if multiple subtrees start at the same position.
  210. struct ParseTree::Parser::SubtreeStart {
  211. int tree_size;
  212. };
  213. auto ParseTree::Parser::GetSubtreeStartPosition() -> SubtreeStart {
  214. return {static_cast<int>(tree.node_impls.size())};
  215. }
  216. auto ParseTree::Parser::AddNode(ParseNodeKind n_kind, TokenizedBuffer::Token t,
  217. SubtreeStart start, bool has_error) -> Node {
  218. // The size of the subtree is the change in size from when we started this
  219. // subtree to now, but including the node we're about to add.
  220. int tree_stop_size = static_cast<int>(tree.node_impls.size()) + 1;
  221. int subtree_size = tree_stop_size - start.tree_size;
  222. Node n(tree.node_impls.size());
  223. tree.node_impls.push_back(NodeImpl(n_kind, t, subtree_size));
  224. if (has_error) {
  225. MarkNodeError(n);
  226. }
  227. return n;
  228. }
  229. auto ParseTree::Parser::SkipMatchingGroup() -> bool {
  230. TokenizedBuffer::Token t = *position;
  231. TokenKind t_kind = tokens.GetKind(t);
  232. if (!t_kind.IsOpeningSymbol()) {
  233. return false;
  234. }
  235. SkipTo(tokens.GetMatchedClosingToken(t));
  236. Consume(t_kind.GetClosingSymbol());
  237. return true;
  238. }
  239. auto ParseTree::Parser::SkipTo(TokenizedBuffer::Token t) -> void {
  240. assert(t >= *position && "Tried to skip backwards.");
  241. position = TokenizedBuffer::TokenIterator(t);
  242. assert(position != end && "Skipped past EOF.");
  243. }
  244. auto ParseTree::Parser::FindNextOf(
  245. std::initializer_list<TokenKind> desired_kinds)
  246. -> llvm::Optional<TokenizedBuffer::Token> {
  247. auto new_position = position;
  248. while (true) {
  249. TokenizedBuffer::Token token = *new_position;
  250. TokenKind kind = tokens.GetKind(token);
  251. if (kind.IsOneOf(desired_kinds)) {
  252. return token;
  253. }
  254. // Step to the next token at the current bracketing level.
  255. if (kind.IsClosingSymbol() || kind == TokenKind::EndOfFile()) {
  256. // There are no more tokens at this level.
  257. return llvm::None;
  258. } else if (kind.IsOpeningSymbol()) {
  259. new_position =
  260. TokenizedBuffer::TokenIterator(tokens.GetMatchedClosingToken(token));
  261. } else {
  262. ++new_position;
  263. }
  264. }
  265. }
  266. auto ParseTree::Parser::SkipPastLikelyEnd(TokenizedBuffer::Token skip_root,
  267. SemiHandler on_semi)
  268. -> llvm::Optional<Node> {
  269. if (AtEndOfFile()) {
  270. return llvm::None;
  271. }
  272. TokenizedBuffer::Line root_line = tokens.GetLine(skip_root);
  273. int root_line_indent = tokens.GetIndentColumnNumber(root_line);
  274. // We will keep scanning through tokens on the same line as the root or
  275. // lines with greater indentation than root's line.
  276. auto is_same_line_or_indent_greater_than_root =
  277. [&](TokenizedBuffer::Token t) {
  278. TokenizedBuffer::Line l = tokens.GetLine(t);
  279. if (l == root_line) {
  280. return true;
  281. }
  282. return tokens.GetIndentColumnNumber(l) > root_line_indent;
  283. };
  284. do {
  285. if (NextTokenKind() == TokenKind::CloseCurlyBrace()) {
  286. // Immediately bail out if we hit an unmatched close curly, this will
  287. // pop us up a level of the syntax grouping.
  288. return llvm::None;
  289. }
  290. // We assume that a semicolon is always intended to be the end of the
  291. // current construct.
  292. if (auto semi = ConsumeIf(TokenKind::Semi())) {
  293. return on_semi(*semi);
  294. }
  295. // Skip over any matching group of tokens.
  296. if (SkipMatchingGroup()) {
  297. continue;
  298. }
  299. // Otherwise just step forward one token.
  300. Consume(NextTokenKind());
  301. } while (!AtEndOfFile() &&
  302. is_same_line_or_indent_greater_than_root(*position));
  303. return llvm::None;
  304. }
  305. auto ParseTree::Parser::ParseCloseParen(TokenizedBuffer::Token open_paren,
  306. ParseNodeKind kind)
  307. -> llvm::Optional<Node> {
  308. if (auto close_paren =
  309. ConsumeAndAddLeafNodeIf(TokenKind::CloseParen(), kind)) {
  310. return close_paren;
  311. }
  312. emitter.EmitError<ExpectedCloseParen>(*position, {.open_paren = open_paren});
  313. SkipTo(tokens.GetMatchedClosingToken(open_paren));
  314. AddLeafNode(kind, Consume(TokenKind::CloseParen()));
  315. return llvm::None;
  316. }
  317. template <typename ListElementParser, typename ListCompletionHandler>
  318. auto ParseTree::Parser::ParseParenList(ListElementParser list_element_parser,
  319. ParseNodeKind comma_kind,
  320. ListCompletionHandler list_handler)
  321. -> llvm::Optional<Node> {
  322. // `(` element-list[opt] `)`
  323. //
  324. // element-list ::= element
  325. // ::= element `,` element-list
  326. TokenizedBuffer::Token open_paren = Consume(TokenKind::OpenParen());
  327. bool has_errors = false;
  328. // Parse elements, if any are specified.
  329. if (!NextTokenIs(TokenKind::CloseParen())) {
  330. while (true) {
  331. bool element_error = !list_element_parser();
  332. has_errors |= element_error;
  333. if (!NextTokenIsOneOf({TokenKind::CloseParen(), TokenKind::Comma()})) {
  334. if (!element_error) {
  335. emitter.EmitError<UnexpectedTokenAfterListElement>(*position);
  336. }
  337. has_errors = true;
  338. auto end_of_element =
  339. FindNextOf({TokenKind::Comma(), TokenKind::CloseParen()});
  340. // The lexer guarantees that parentheses are balanced.
  341. assert(end_of_element && "missing matching `)` for `(`");
  342. SkipTo(*end_of_element);
  343. }
  344. if (NextTokenIs(TokenKind::CloseParen())) {
  345. break;
  346. }
  347. AddLeafNode(comma_kind, Consume(TokenKind::Comma()));
  348. }
  349. }
  350. return list_handler(open_paren, Consume(TokenKind::CloseParen()), has_errors);
  351. }
  352. auto ParseTree::Parser::ParsePattern(PatternKind kind) -> llvm::Optional<Node> {
  353. if (NextTokenIs(TokenKind::Identifier()) &&
  354. tokens.GetKind(*(position + 1)) == TokenKind::Colon()) {
  355. // identifier `:` type
  356. auto start = GetSubtreeStartPosition();
  357. AddLeafNode(ParseNodeKind::DeclaredName(),
  358. Consume(TokenKind::Identifier()));
  359. auto colon = Consume(TokenKind::Colon());
  360. auto type = ParseType();
  361. return AddNode(ParseNodeKind::PatternBinding(), colon, start,
  362. /*has_error=*/!type);
  363. }
  364. switch (kind) {
  365. case PatternKind::Parameter:
  366. emitter.EmitError<ExpectedParameterName>(*position);
  367. break;
  368. case PatternKind::Variable:
  369. emitter.EmitError<ExpectedVariableName>(*position);
  370. break;
  371. }
  372. return llvm::None;
  373. }
  374. auto ParseTree::Parser::ParseFunctionParameter() -> llvm::Optional<Node> {
  375. return ParsePattern(PatternKind::Parameter);
  376. }
  377. auto ParseTree::Parser::ParseFunctionSignature() -> bool {
  378. auto start = GetSubtreeStartPosition();
  379. auto params = ParseParenList(
  380. [&] { return ParseFunctionParameter(); },
  381. ParseNodeKind::ParameterListComma(),
  382. [&](TokenizedBuffer::Token open_paren, TokenizedBuffer::Token close_paren,
  383. bool has_errors) {
  384. AddLeafNode(ParseNodeKind::ParameterListEnd(), close_paren);
  385. return AddNode(ParseNodeKind::ParameterList(), open_paren, start,
  386. has_errors);
  387. });
  388. auto start_return_type = GetSubtreeStartPosition();
  389. if (auto arrow = ConsumeIf(TokenKind::MinusGreater())) {
  390. auto return_type = ParseType();
  391. AddNode(ParseNodeKind::ReturnType(), *arrow, start_return_type,
  392. /*has_error=*/!return_type);
  393. if (!return_type) {
  394. return false;
  395. }
  396. }
  397. return params.hasValue();
  398. }
  399. auto ParseTree::Parser::ParseCodeBlock() -> Node {
  400. TokenizedBuffer::Token open_curly = Consume(TokenKind::OpenCurlyBrace());
  401. auto start = GetSubtreeStartPosition();
  402. bool has_errors = false;
  403. // Loop over all the different possibly nested elements in the code block.
  404. while (!NextTokenIs(TokenKind::CloseCurlyBrace())) {
  405. if (!ParseStatement()) {
  406. // We detected and diagnosed an error of some kind. We can trivially skip
  407. // to the actual close curly brace from here.
  408. // FIXME: It would be better to skip to the next semicolon, or the next
  409. // token at the start of a line with the same indent as this one.
  410. SkipTo(tokens.GetMatchedClosingToken(open_curly));
  411. has_errors = true;
  412. break;
  413. }
  414. }
  415. // We always reach here having set our position in the token stream to the
  416. // close curly brace.
  417. AddLeafNode(ParseNodeKind::CodeBlockEnd(),
  418. Consume(TokenKind::CloseCurlyBrace()));
  419. return AddNode(ParseNodeKind::CodeBlock(), open_curly, start, has_errors);
  420. }
  421. auto ParseTree::Parser::ParseFunctionDeclaration() -> Node {
  422. TokenizedBuffer::Token function_intro_token = Consume(TokenKind::FnKeyword());
  423. auto start = GetSubtreeStartPosition();
  424. auto add_error_function_node = [&] {
  425. return AddNode(ParseNodeKind::FunctionDeclaration(), function_intro_token,
  426. start, /*has_error=*/true);
  427. };
  428. auto handle_semi_in_error_recovery = [&](TokenizedBuffer::Token semi) {
  429. return AddLeafNode(ParseNodeKind::DeclarationEnd(), semi);
  430. };
  431. auto name_n = ConsumeAndAddLeafNodeIf(TokenKind::Identifier(),
  432. ParseNodeKind::DeclaredName());
  433. if (!name_n) {
  434. emitter.EmitError<ExpectedFunctionName>(*position);
  435. // FIXME: We could change the lexer to allow us to synthesize certain
  436. // kinds of tokens and try to "recover" here, but unclear that this is
  437. // really useful.
  438. SkipPastLikelyEnd(function_intro_token, handle_semi_in_error_recovery);
  439. return add_error_function_node();
  440. }
  441. TokenizedBuffer::Token open_paren = *position;
  442. if (tokens.GetKind(open_paren) != TokenKind::OpenParen()) {
  443. emitter.EmitError<ExpectedFunctionParams>(open_paren);
  444. SkipPastLikelyEnd(function_intro_token, handle_semi_in_error_recovery);
  445. return add_error_function_node();
  446. }
  447. TokenizedBuffer::Token close_paren =
  448. tokens.GetMatchedClosingToken(open_paren);
  449. if (!ParseFunctionSignature()) {
  450. // Don't try to parse more of the function declaration, but consume a
  451. // declaration ending semicolon if found (without going to a new line).
  452. SkipPastLikelyEnd(function_intro_token, handle_semi_in_error_recovery);
  453. return add_error_function_node();
  454. }
  455. // See if we should parse a definition which is represented as a code block.
  456. if (NextTokenIs(TokenKind::OpenCurlyBrace())) {
  457. ParseCodeBlock();
  458. } else if (!ConsumeAndAddLeafNodeIf(TokenKind::Semi(),
  459. ParseNodeKind::DeclarationEnd())) {
  460. emitter.EmitError<ExpectedFunctionBodyOrSemi>(*position);
  461. if (tokens.GetLine(*position) == tokens.GetLine(close_paren)) {
  462. // Only need to skip if we've not already found a new line.
  463. SkipPastLikelyEnd(function_intro_token, handle_semi_in_error_recovery);
  464. }
  465. return add_error_function_node();
  466. }
  467. // Successfully parsed the function, add that node.
  468. return AddNode(ParseNodeKind::FunctionDeclaration(), function_intro_token,
  469. start);
  470. }
  471. auto ParseTree::Parser::ParseVariableDeclaration() -> Node {
  472. // `var` pattern [= expression] `;`
  473. TokenizedBuffer::Token var_token = Consume(TokenKind::VarKeyword());
  474. auto start = GetSubtreeStartPosition();
  475. auto pattern = ParsePattern(PatternKind::Variable);
  476. if (!pattern) {
  477. if (auto after_pattern =
  478. FindNextOf({TokenKind::Equal(), TokenKind::Semi()})) {
  479. SkipTo(*after_pattern);
  480. }
  481. }
  482. auto start_init = GetSubtreeStartPosition();
  483. if (auto equal_token = ConsumeIf(TokenKind::Equal())) {
  484. auto init = ParseExpression();
  485. AddNode(ParseNodeKind::VariableInitializer(), *equal_token, start_init,
  486. /*has_error=*/!init);
  487. }
  488. auto semi = ConsumeAndAddLeafNodeIf(TokenKind::Semi(),
  489. ParseNodeKind::DeclarationEnd());
  490. if (!semi) {
  491. emitter.EmitError<ExpectedSemiAfterExpression>(*position);
  492. SkipPastLikelyEnd(var_token, [&](TokenizedBuffer::Token semi) {
  493. return AddLeafNode(ParseNodeKind::DeclarationEnd(), semi);
  494. });
  495. }
  496. return AddNode(ParseNodeKind::VariableDeclaration(), var_token, start,
  497. /*has_error=*/!pattern || !semi);
  498. }
  499. auto ParseTree::Parser::ParseEmptyDeclaration() -> Node {
  500. return AddLeafNode(ParseNodeKind::EmptyDeclaration(),
  501. Consume(TokenKind::Semi()));
  502. }
  503. auto ParseTree::Parser::ParseDeclaration() -> llvm::Optional<Node> {
  504. switch (NextTokenKind()) {
  505. case TokenKind::FnKeyword():
  506. return ParseFunctionDeclaration();
  507. case TokenKind::VarKeyword():
  508. return ParseVariableDeclaration();
  509. case TokenKind::Semi():
  510. return ParseEmptyDeclaration();
  511. case TokenKind::EndOfFile():
  512. return llvm::None;
  513. default:
  514. // Errors are handled outside the switch.
  515. break;
  516. }
  517. // We didn't recognize an introducer for a valid declaration.
  518. emitter.EmitError<UnrecognizedDeclaration>(*position);
  519. // Skip forward past any end of a declaration we simply didn't understand so
  520. // that we can find the start of the next declaration or the end of a scope.
  521. if (auto found_semi_n =
  522. SkipPastLikelyEnd(*position, [&](TokenizedBuffer::Token semi) {
  523. return AddLeafNode(ParseNodeKind::EmptyDeclaration(), semi);
  524. })) {
  525. MarkNodeError(*found_semi_n);
  526. return *found_semi_n;
  527. }
  528. // Nothing, not even a semicolon found.
  529. return llvm::None;
  530. }
  531. auto ParseTree::Parser::ParseParenExpression() -> llvm::Optional<Node> {
  532. // `(` expression `)`
  533. auto start = GetSubtreeStartPosition();
  534. TokenizedBuffer::Token open_paren = Consume(TokenKind::OpenParen());
  535. // TODO: If the next token is a close paren, build an empty tuple literal.
  536. auto expr = ParseExpression();
  537. // TODO: If the next token is a comma, build a tuple literal.
  538. auto close_paren =
  539. ParseCloseParen(open_paren, ParseNodeKind::ParenExpressionEnd());
  540. return AddNode(ParseNodeKind::ParenExpression(), open_paren, start,
  541. /*has_errors=*/!expr || !close_paren);
  542. }
  543. auto ParseTree::Parser::ParsePrimaryExpression() -> llvm::Optional<Node> {
  544. llvm::Optional<ParseNodeKind> kind;
  545. switch (NextTokenKind()) {
  546. case TokenKind::Identifier():
  547. kind = ParseNodeKind::NameReference();
  548. break;
  549. case TokenKind::IntegerLiteral():
  550. case TokenKind::RealLiteral():
  551. case TokenKind::StringLiteral():
  552. kind = ParseNodeKind::Literal();
  553. break;
  554. case TokenKind::OpenParen():
  555. return ParseParenExpression();
  556. default:
  557. emitter.EmitError<ExpectedExpression>(*position);
  558. return llvm::None;
  559. }
  560. return AddLeafNode(*kind, Consume(NextTokenKind()));
  561. }
  562. auto ParseTree::Parser::ParseDesignatorExpression(SubtreeStart start,
  563. bool has_errors)
  564. -> llvm::Optional<Node> {
  565. // `.` identifier
  566. auto dot = Consume(TokenKind::Period());
  567. auto name = ConsumeIf(TokenKind::Identifier());
  568. if (name) {
  569. AddLeafNode(ParseNodeKind::DesignatedName(), *name);
  570. } else {
  571. emitter.EmitError<ExpectedIdentifierAfterDot>(*position);
  572. // If we see a keyword, assume it was intended to be the designated name.
  573. // TODO: Should keywords be valid in designators?
  574. if (NextTokenKind().IsKeyword()) {
  575. Consume(NextTokenKind());
  576. }
  577. has_errors = true;
  578. }
  579. return AddNode(ParseNodeKind::DesignatorExpression(), dot, start, has_errors);
  580. }
  581. auto ParseTree::Parser::ParseCallExpression(SubtreeStart start, bool has_errors)
  582. -> llvm::Optional<Node> {
  583. // `(` expression-list[opt] `)`
  584. //
  585. // expression-list ::= expression
  586. // ::= expression `,` expression-list
  587. return ParseParenList(
  588. [&] { return ParseExpression(); }, ParseNodeKind::CallExpressionComma(),
  589. [&](TokenizedBuffer::Token open_paren, TokenizedBuffer::Token close_paren,
  590. bool has_arg_errors) {
  591. AddLeafNode(ParseNodeKind::CallExpressionEnd(), close_paren);
  592. return AddNode(ParseNodeKind::CallExpression(), open_paren, start,
  593. has_errors || has_arg_errors);
  594. });
  595. }
  596. auto ParseTree::Parser::ParsePostfixExpression() -> llvm::Optional<Node> {
  597. auto start = GetSubtreeStartPosition();
  598. llvm::Optional<Node> expression = ParsePrimaryExpression();
  599. while (true) {
  600. switch (NextTokenKind()) {
  601. case TokenKind::Period():
  602. expression = ParseDesignatorExpression(start, !expression);
  603. break;
  604. case TokenKind::OpenParen():
  605. expression = ParseCallExpression(start, !expression);
  606. break;
  607. default: {
  608. return expression;
  609. }
  610. }
  611. }
  612. }
  613. // Determines whether the given token is considered to be the start of an
  614. // operand according to the rules for infix operator parsing.
  615. static auto IsAssumedStartOfOperand(TokenKind kind) -> bool {
  616. return kind.IsOneOf({TokenKind::OpenParen(), TokenKind::Identifier(),
  617. TokenKind::IntegerLiteral(), TokenKind::RealLiteral(),
  618. TokenKind::StringLiteral()});
  619. }
  620. // Determines whether the given token is considered to be the end of an operand
  621. // according to the rules for infix operator parsing.
  622. static auto IsAssumedEndOfOperand(TokenKind kind) -> bool {
  623. return kind.IsOneOf({TokenKind::CloseParen(), TokenKind::CloseCurlyBrace(),
  624. TokenKind::CloseSquareBracket(), TokenKind::Identifier(),
  625. TokenKind::IntegerLiteral(), TokenKind::RealLiteral(),
  626. TokenKind::StringLiteral()});
  627. }
  628. // Determines whether the given token could possibly be the start of an operand.
  629. // This is conservatively correct, and will never incorrectly return `false`,
  630. // but can incorrectly return `true`.
  631. static auto IsPossibleStartOfOperand(TokenKind kind) -> bool {
  632. return !kind.IsOneOf({TokenKind::CloseParen(), TokenKind::CloseCurlyBrace(),
  633. TokenKind::CloseSquareBracket(), TokenKind::Comma(),
  634. TokenKind::Semi(), TokenKind::Colon()});
  635. }
  636. auto ParseTree::Parser::IsLexicallyValidInfixOperator() -> bool {
  637. assert(!AtEndOfFile() && "Expected an operator token.");
  638. bool leading_space = tokens.HasLeadingWhitespace(*position);
  639. bool trailing_space = tokens.HasTrailingWhitespace(*position);
  640. // If there's whitespace on both sides, it's an infix operator.
  641. if (leading_space && trailing_space) {
  642. return true;
  643. }
  644. // If there's whitespace on exactly one side, it's not an infix operator.
  645. if (leading_space || trailing_space) {
  646. return false;
  647. }
  648. // Otherwise, for an infix operator, the preceding token must be any close
  649. // bracket, identifier, or literal and the next token must be an open paren,
  650. // identifier, or literal.
  651. if (position == tokens.Tokens().begin() ||
  652. !IsAssumedEndOfOperand(tokens.GetKind(*(position - 1))) ||
  653. !IsAssumedStartOfOperand(tokens.GetKind(*(position + 1)))) {
  654. return false;
  655. }
  656. return true;
  657. }
  658. auto ParseTree::Parser::DiagnoseOperatorFixity(OperatorFixity fixity) -> void {
  659. bool is_valid_as_infix = IsLexicallyValidInfixOperator();
  660. if (fixity == OperatorFixity::Infix) {
  661. // Infix operators must satisfy the infix operator rules.
  662. if (!is_valid_as_infix) {
  663. emitter.EmitError<BinaryOperatorRequiresWhitespace>(
  664. *position,
  665. {.has_leading_space = tokens.HasLeadingWhitespace(*position),
  666. .has_trailing_space = tokens.HasTrailingWhitespace(*position)});
  667. }
  668. } else {
  669. bool prefix = fixity == OperatorFixity::Prefix;
  670. // Whitespace is not permitted between a symbolic pre/postfix operator and
  671. // its operand.
  672. if (NextTokenKind().IsSymbol() &&
  673. (prefix ? tokens.HasTrailingWhitespace(*position)
  674. : tokens.HasLeadingWhitespace(*position))) {
  675. emitter.EmitError<UnaryOperatorHasWhitespace>(*position,
  676. {.prefix = prefix});
  677. }
  678. // Pre/postfix operators must not satisfy the infix operator rules.
  679. if (is_valid_as_infix) {
  680. emitter.EmitError<UnaryOperatorRequiresWhitespace>(*position,
  681. {.prefix = prefix});
  682. }
  683. }
  684. }
  685. auto ParseTree::Parser::IsTrailingOperatorInfix() -> bool {
  686. if (AtEndOfFile()) {
  687. return false;
  688. }
  689. // An operator that follows the infix operator rules is parsed as
  690. // infix, unless the next token means that it can't possibly be.
  691. if (IsLexicallyValidInfixOperator() &&
  692. IsPossibleStartOfOperand(tokens.GetKind(*(position + 1)))) {
  693. return true;
  694. }
  695. // A trailing operator with leading whitespace that's not valid as infix is
  696. // not valid at all. If the next token looks like the start of an operand,
  697. // then parse as infix, otherwise as postfix. Either way we'll produce a
  698. // diagnostic later on.
  699. if (tokens.HasLeadingWhitespace(*position) &&
  700. IsAssumedStartOfOperand(tokens.GetKind(*(position + 1)))) {
  701. return true;
  702. }
  703. return false;
  704. }
  705. auto ParseTree::Parser::ParseOperatorExpression(
  706. PrecedenceGroup ambient_precedence) -> llvm::Optional<Node> {
  707. auto start = GetSubtreeStartPosition();
  708. llvm::Optional<Node> lhs;
  709. PrecedenceGroup lhs_precedence = PrecedenceGroup::ForPostfixExpression();
  710. // Check for a prefix operator.
  711. if (auto operator_precedence = PrecedenceGroup::ForLeading(NextTokenKind());
  712. !operator_precedence) {
  713. lhs = ParsePostfixExpression();
  714. } else {
  715. if (PrecedenceGroup::GetPriority(ambient_precedence,
  716. *operator_precedence) !=
  717. OperatorPriority::RightFirst) {
  718. // The precedence rules don't permit this prefix operator in this
  719. // context. Diagnose this, but carry on and parse it anyway.
  720. emitter.EmitError<OperatorRequiresParentheses>(*position);
  721. } else {
  722. // Check that this operator follows the proper whitespace rules.
  723. DiagnoseOperatorFixity(OperatorFixity::Prefix);
  724. }
  725. auto operator_token = Consume(NextTokenKind());
  726. bool has_errors = !ParseOperatorExpression(*operator_precedence);
  727. lhs = AddNode(ParseNodeKind::PrefixOperator(), operator_token, start,
  728. has_errors);
  729. lhs_precedence = *operator_precedence;
  730. }
  731. // Consume a sequence of infix and postfix operators.
  732. while (auto trailing_operator = PrecedenceGroup::ForTrailing(
  733. NextTokenKind(), IsTrailingOperatorInfix())) {
  734. auto [operator_precedence, is_binary] = *trailing_operator;
  735. // FIXME: If this operator is ambiguous with either the ambient precedence
  736. // or the LHS precedence, and there's a variant with a different fixity
  737. // that would work, use that one instead for error recovery.
  738. if (PrecedenceGroup::GetPriority(ambient_precedence, operator_precedence) !=
  739. OperatorPriority::RightFirst) {
  740. // The precedence rules don't permit this operator in this context. Try
  741. // again in the enclosing expression context.
  742. return lhs;
  743. }
  744. if (PrecedenceGroup::GetPriority(lhs_precedence, operator_precedence) !=
  745. OperatorPriority::LeftFirst) {
  746. // Either the LHS operator and this operator are ambiguous, or the
  747. // LHS operaor is a unary operator that can't be nested within
  748. // this operator. Either way, parentheses are required.
  749. emitter.EmitError<OperatorRequiresParentheses>(*position);
  750. lhs = llvm::None;
  751. } else {
  752. DiagnoseOperatorFixity(is_binary ? OperatorFixity::Infix
  753. : OperatorFixity::Postfix);
  754. }
  755. auto operator_token = Consume(NextTokenKind());
  756. if (is_binary) {
  757. auto rhs = ParseOperatorExpression(operator_precedence);
  758. lhs = AddNode(ParseNodeKind::InfixOperator(), operator_token, start,
  759. /*has_error=*/!lhs || !rhs);
  760. } else {
  761. lhs = AddNode(ParseNodeKind::PostfixOperator(), operator_token, start,
  762. /*has_error=*/!lhs);
  763. }
  764. lhs_precedence = operator_precedence;
  765. }
  766. return lhs;
  767. }
  768. auto ParseTree::Parser::ParseExpression() -> llvm::Optional<Node> {
  769. return ParseOperatorExpression(PrecedenceGroup::ForTopLevelExpression());
  770. }
  771. auto ParseTree::Parser::ParseType() -> llvm::Optional<Node> {
  772. return ParseOperatorExpression(PrecedenceGroup::ForType());
  773. }
  774. auto ParseTree::Parser::ParseExpressionStatement() -> llvm::Optional<Node> {
  775. TokenizedBuffer::Token start_token = *position;
  776. auto start = GetSubtreeStartPosition();
  777. bool has_errors = !ParseExpression();
  778. if (auto semi = ConsumeIf(TokenKind::Semi())) {
  779. return AddNode(ParseNodeKind::ExpressionStatement(), *semi, start,
  780. has_errors);
  781. }
  782. if (!has_errors) {
  783. emitter.EmitError<ExpectedSemiAfterExpression>(*position);
  784. }
  785. if (auto recovery_node =
  786. SkipPastLikelyEnd(start_token, [&](TokenizedBuffer::Token semi) {
  787. return AddNode(ParseNodeKind::ExpressionStatement(), semi, start,
  788. true);
  789. })) {
  790. return recovery_node;
  791. }
  792. // Found junk not even followed by a `;`.
  793. return llvm::None;
  794. }
  795. auto ParseTree::Parser::ParseParenCondition(TokenKind introducer)
  796. -> llvm::Optional<Node> {
  797. // `(` expression `)`
  798. auto start = GetSubtreeStartPosition();
  799. auto open_paren = ConsumeIf(TokenKind::OpenParen());
  800. if (!open_paren) {
  801. emitter.EmitError<ExpectedParenAfter>(*position,
  802. {.introducer = introducer});
  803. }
  804. auto expr = ParseExpression();
  805. if (!open_paren) {
  806. // Don't expect a matching closing paren if there wasn't an opening paren.
  807. return llvm::None;
  808. }
  809. auto close_paren =
  810. ParseCloseParen(*open_paren, ParseNodeKind::ConditionEnd());
  811. return AddNode(ParseNodeKind::Condition(), *open_paren, start,
  812. /*has_errors=*/!expr || !close_paren);
  813. }
  814. auto ParseTree::Parser::ParseIfStatement() -> llvm::Optional<Node> {
  815. auto start = GetSubtreeStartPosition();
  816. auto if_token = Consume(TokenKind::IfKeyword());
  817. auto cond = ParseParenCondition(TokenKind::IfKeyword());
  818. auto then_case = ParseStatement();
  819. bool else_has_errors = false;
  820. if (ConsumeAndAddLeafNodeIf(TokenKind::ElseKeyword(),
  821. ParseNodeKind::IfStatementElse())) {
  822. else_has_errors = !ParseStatement();
  823. }
  824. return AddNode(ParseNodeKind::IfStatement(), if_token, start,
  825. /*has_errors=*/!cond || !then_case || else_has_errors);
  826. }
  827. auto ParseTree::Parser::ParseWhileStatement() -> llvm::Optional<Node> {
  828. auto start = GetSubtreeStartPosition();
  829. auto while_token = Consume(TokenKind::WhileKeyword());
  830. auto cond = ParseParenCondition(TokenKind::WhileKeyword());
  831. auto body = ParseStatement();
  832. return AddNode(ParseNodeKind::WhileStatement(), while_token, start,
  833. /*has_errors=*/!cond || !body);
  834. }
  835. auto ParseTree::Parser::ParseKeywordStatement(ParseNodeKind kind,
  836. KeywordStatementArgument argument)
  837. -> llvm::Optional<Node> {
  838. auto keyword_kind = NextTokenKind();
  839. assert(keyword_kind.IsKeyword());
  840. auto start = GetSubtreeStartPosition();
  841. auto keyword = Consume(keyword_kind);
  842. bool arg_error = false;
  843. if ((argument == KeywordStatementArgument::Optional &&
  844. NextTokenKind() != TokenKind::Semi()) ||
  845. argument == KeywordStatementArgument::Mandatory) {
  846. arg_error = !ParseExpression();
  847. }
  848. auto semi =
  849. ConsumeAndAddLeafNodeIf(TokenKind::Semi(), ParseNodeKind::StatementEnd());
  850. if (!semi) {
  851. emitter.EmitError<ExpectedSemiAfter>(*position,
  852. {.preceding = keyword_kind});
  853. // FIXME: Try to skip to a semicolon to recover.
  854. }
  855. return AddNode(kind, keyword, start, /*has_errors=*/!semi || arg_error);
  856. }
  857. auto ParseTree::Parser::ParseStatement() -> llvm::Optional<Node> {
  858. switch (NextTokenKind()) {
  859. case TokenKind::VarKeyword():
  860. return ParseVariableDeclaration();
  861. case TokenKind::IfKeyword():
  862. return ParseIfStatement();
  863. case TokenKind::WhileKeyword():
  864. return ParseWhileStatement();
  865. case TokenKind::ContinueKeyword():
  866. return ParseKeywordStatement(ParseNodeKind::ContinueStatement(),
  867. KeywordStatementArgument::None);
  868. case TokenKind::BreakKeyword():
  869. return ParseKeywordStatement(ParseNodeKind::BreakStatement(),
  870. KeywordStatementArgument::None);
  871. case TokenKind::ReturnKeyword():
  872. return ParseKeywordStatement(ParseNodeKind::ReturnStatement(),
  873. KeywordStatementArgument::Optional);
  874. case TokenKind::OpenCurlyBrace():
  875. return ParseCodeBlock();
  876. default:
  877. // A statement with no introducer token can only be an expression
  878. // statement.
  879. return ParseExpressionStatement();
  880. }
  881. }
  882. } // namespace Carbon