parser_impl.cpp 38 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "toolchain/parser/parser_impl.h"
  5. #include <cstdlib>
  6. #include "llvm/ADT/Optional.h"
  7. #include "llvm/Support/FormatVariadic.h"
  8. #include "llvm/Support/raw_ostream.h"
  9. #include "toolchain/lexer/token_kind.h"
  10. #include "toolchain/lexer/tokenized_buffer.h"
  11. #include "toolchain/parser/parse_node_kind.h"
  12. #include "toolchain/parser/parse_tree.h"
  13. namespace Carbon {
  14. struct UnexpectedTokenInCodeBlock
  15. : SimpleDiagnostic<UnexpectedTokenInCodeBlock> {
  16. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  17. static constexpr llvm::StringLiteral Message =
  18. "Unexpected token in code block.";
  19. };
  20. struct ExpectedFunctionName : SimpleDiagnostic<ExpectedFunctionName> {
  21. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  22. static constexpr llvm::StringLiteral Message =
  23. "Expected function name after `fn` keyword.";
  24. };
  25. struct ExpectedFunctionParams : SimpleDiagnostic<ExpectedFunctionParams> {
  26. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  27. static constexpr llvm::StringLiteral Message =
  28. "Expected `(` after function name.";
  29. };
  30. struct ExpectedFunctionBodyOrSemi
  31. : SimpleDiagnostic<ExpectedFunctionBodyOrSemi> {
  32. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  33. static constexpr llvm::StringLiteral Message =
  34. "Expected function definition or `;` after function declaration.";
  35. };
  36. struct ExpectedVariableName : SimpleDiagnostic<ExpectedVariableName> {
  37. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  38. static constexpr llvm::StringLiteral Message =
  39. "Expected pattern in `var` declaration.";
  40. };
  41. struct ExpectedParameterName : SimpleDiagnostic<ExpectedParameterName> {
  42. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  43. static constexpr llvm::StringLiteral Message =
  44. "Expected parameter declaration.";
  45. };
  46. struct UnrecognizedDeclaration : SimpleDiagnostic<UnrecognizedDeclaration> {
  47. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  48. static constexpr llvm::StringLiteral Message =
  49. "Unrecognized declaration introducer.";
  50. };
  51. struct ExpectedCodeBlock : SimpleDiagnostic<ExpectedCodeBlock> {
  52. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  53. static constexpr llvm::StringLiteral Message = "Expected braced code block.";
  54. };
  55. struct ExpectedExpression : SimpleDiagnostic<ExpectedExpression> {
  56. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  57. static constexpr llvm::StringLiteral Message = "Expected expression.";
  58. };
  59. struct ExpectedParenAfter : SimpleDiagnostic<ExpectedParenAfter> {
  60. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  61. static constexpr const char* Message = "Expected `(` after `{0}`.";
  62. TokenKind introducer;
  63. auto Format() -> std::string {
  64. return llvm::formatv(Message, introducer.GetFixedSpelling()).str();
  65. }
  66. };
  67. struct ExpectedCloseParen : SimpleDiagnostic<ExpectedCloseParen> {
  68. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  69. static constexpr llvm::StringLiteral Message =
  70. "Unexpected tokens before `)`.";
  71. // TODO: Include the location of the matching open paren in the diagnostic.
  72. TokenizedBuffer::Token open_paren;
  73. };
  74. struct ExpectedSemiAfterExpression
  75. : SimpleDiagnostic<ExpectedSemiAfterExpression> {
  76. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  77. static constexpr llvm::StringLiteral Message =
  78. "Expected `;` after expression.";
  79. };
  80. struct ExpectedSemiAfter : SimpleDiagnostic<ExpectedSemiAfter> {
  81. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  82. static constexpr const char* Message = "Expected `;` after `{0}`.";
  83. TokenKind preceding;
  84. auto Format() -> std::string {
  85. return llvm::formatv(Message, preceding.GetFixedSpelling()).str();
  86. }
  87. };
  88. struct ExpectedIdentifierAfterDot
  89. : SimpleDiagnostic<ExpectedIdentifierAfterDot> {
  90. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  91. static constexpr llvm::StringLiteral Message =
  92. "Expected identifier after `.`.";
  93. };
  94. struct UnexpectedTokenAfterListElement
  95. : SimpleDiagnostic<UnexpectedTokenAfterListElement> {
  96. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  97. static constexpr llvm::StringLiteral Message = "Expected `,` or `)`.";
  98. };
  99. struct BinaryOperatorRequiresWhitespace
  100. : SimpleDiagnostic<BinaryOperatorRequiresWhitespace> {
  101. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  102. static constexpr const char* Message =
  103. "Whitespace missing {0} binary operator.";
  104. bool has_leading_space;
  105. bool has_trailing_space;
  106. auto Format() -> std::string {
  107. const char* where = "around";
  108. // clang-format off
  109. if (has_leading_space) {
  110. where = "after";
  111. } else if (has_trailing_space) {
  112. where = "before";
  113. }
  114. // clang-format on
  115. return llvm::formatv(Message, where);
  116. }
  117. };
  118. struct UnaryOperatorHasWhitespace
  119. : SimpleDiagnostic<UnaryOperatorHasWhitespace> {
  120. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  121. static constexpr const char* Message =
  122. "Whitespace is not allowed {0} this unary operator.";
  123. bool prefix;
  124. auto Format() -> std::string {
  125. return llvm::formatv(Message, prefix ? "after" : "before");
  126. }
  127. };
  128. struct UnaryOperatorRequiresWhitespace
  129. : SimpleDiagnostic<UnaryOperatorRequiresWhitespace> {
  130. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  131. static constexpr const char* Message =
  132. "Whitespace is required {0} this unary operator.";
  133. bool prefix;
  134. auto Format() -> std::string {
  135. return llvm::formatv(Message, prefix ? "before" : "after");
  136. }
  137. };
  138. struct OperatorRequiresParentheses
  139. : SimpleDiagnostic<OperatorRequiresParentheses> {
  140. static constexpr llvm::StringLiteral ShortName = "syntax-error";
  141. static constexpr llvm::StringLiteral Message =
  142. "Parentheses are required to disambiguate operator precedence.";
  143. };
  144. ParseTree::Parser::Parser(ParseTree& tree_arg, TokenizedBuffer& tokens_arg,
  145. TokenDiagnosticEmitter& emitter)
  146. : tree(tree_arg),
  147. tokens(tokens_arg),
  148. emitter(emitter),
  149. position(tokens.Tokens().begin()),
  150. end(tokens.Tokens().end()) {
  151. assert(std::find_if(position, end,
  152. [&](TokenizedBuffer::Token t) {
  153. return tokens.GetKind(t) == TokenKind::EndOfFile();
  154. }) != end &&
  155. "No EndOfFileToken in token buffer.");
  156. }
  157. auto ParseTree::Parser::Parse(TokenizedBuffer& tokens,
  158. TokenDiagnosticEmitter& emitter) -> ParseTree {
  159. ParseTree tree(tokens);
  160. // We expect to have a 1:1 correspondence between tokens and tree nodes, so
  161. // reserve the space we expect to need here to avoid allocation and copying
  162. // overhead.
  163. tree.node_impls.reserve(tokens.Size());
  164. Parser parser(tree, tokens, emitter);
  165. while (!parser.AtEndOfFile()) {
  166. if (!parser.ParseDeclaration()) {
  167. // We don't have an enclosing parse tree node to mark as erroneous, so
  168. // just mark the tree as a whole.
  169. tree.has_errors = true;
  170. }
  171. }
  172. parser.AddLeafNode(ParseNodeKind::FileEnd(), *parser.position);
  173. assert(tree.Verify() && "Parse tree built but does not verify!");
  174. return tree;
  175. }
  176. auto ParseTree::Parser::Consume(TokenKind kind) -> TokenizedBuffer::Token {
  177. assert(kind != TokenKind::EndOfFile() && "Cannot consume the EOF token!");
  178. assert(NextTokenIs(kind) && "The current token is the wrong kind!");
  179. TokenizedBuffer::Token t = *position;
  180. ++position;
  181. assert(position != end && "Reached end of tokens without finding EOF token.");
  182. return t;
  183. }
  184. auto ParseTree::Parser::ConsumeIf(TokenKind kind)
  185. -> llvm::Optional<TokenizedBuffer::Token> {
  186. if (!NextTokenIs(kind)) {
  187. return {};
  188. }
  189. return Consume(kind);
  190. }
  191. auto ParseTree::Parser::AddLeafNode(ParseNodeKind kind,
  192. TokenizedBuffer::Token token) -> Node {
  193. Node n(tree.node_impls.size());
  194. tree.node_impls.push_back(NodeImpl(kind, token, /*subtree_size_arg=*/1));
  195. return n;
  196. }
  197. auto ParseTree::Parser::ConsumeAndAddLeafNodeIf(TokenKind t_kind,
  198. ParseNodeKind n_kind)
  199. -> llvm::Optional<Node> {
  200. auto t = ConsumeIf(t_kind);
  201. if (!t) {
  202. return {};
  203. }
  204. return AddLeafNode(n_kind, *t);
  205. }
  206. auto ParseTree::Parser::MarkNodeError(Node n) -> void {
  207. tree.node_impls[n.index].has_error = true;
  208. tree.has_errors = true;
  209. }
  210. // A marker for the start of a node's subtree.
  211. //
  212. // This is used to track the size of the node's subtree. It can be used
  213. // repeatedly if multiple subtrees start at the same position.
  214. struct ParseTree::Parser::SubtreeStart {
  215. int tree_size;
  216. };
  217. auto ParseTree::Parser::GetSubtreeStartPosition() -> SubtreeStart {
  218. return {static_cast<int>(tree.node_impls.size())};
  219. }
  220. auto ParseTree::Parser::AddNode(ParseNodeKind n_kind, TokenizedBuffer::Token t,
  221. SubtreeStart start, bool has_error) -> Node {
  222. // The size of the subtree is the change in size from when we started this
  223. // subtree to now, but including the node we're about to add.
  224. int tree_stop_size = static_cast<int>(tree.node_impls.size()) + 1;
  225. int subtree_size = tree_stop_size - start.tree_size;
  226. Node n(tree.node_impls.size());
  227. tree.node_impls.push_back(NodeImpl(n_kind, t, subtree_size));
  228. if (has_error) {
  229. MarkNodeError(n);
  230. }
  231. return n;
  232. }
  233. auto ParseTree::Parser::SkipMatchingGroup() -> bool {
  234. TokenizedBuffer::Token t = *position;
  235. TokenKind t_kind = tokens.GetKind(t);
  236. if (!t_kind.IsOpeningSymbol()) {
  237. return false;
  238. }
  239. SkipTo(tokens.GetMatchedClosingToken(t));
  240. Consume(t_kind.GetClosingSymbol());
  241. return true;
  242. }
  243. auto ParseTree::Parser::SkipTo(TokenizedBuffer::Token t) -> void {
  244. assert(t >= *position && "Tried to skip backwards.");
  245. position = TokenizedBuffer::TokenIterator(t);
  246. assert(position != end && "Skipped past EOF.");
  247. }
  248. auto ParseTree::Parser::FindNextOf(
  249. std::initializer_list<TokenKind> desired_kinds)
  250. -> llvm::Optional<TokenizedBuffer::Token> {
  251. auto new_position = position;
  252. while (true) {
  253. TokenizedBuffer::Token token = *new_position;
  254. TokenKind kind = tokens.GetKind(token);
  255. if (kind.IsOneOf(desired_kinds)) {
  256. return token;
  257. }
  258. // Step to the next token at the current bracketing level.
  259. if (kind.IsClosingSymbol() || kind == TokenKind::EndOfFile()) {
  260. // There are no more tokens at this level.
  261. return llvm::None;
  262. } else if (kind.IsOpeningSymbol()) {
  263. new_position =
  264. TokenizedBuffer::TokenIterator(tokens.GetMatchedClosingToken(token));
  265. } else {
  266. ++new_position;
  267. }
  268. }
  269. }
  270. auto ParseTree::Parser::SkipPastLikelyEnd(TokenizedBuffer::Token skip_root,
  271. SemiHandler on_semi)
  272. -> llvm::Optional<Node> {
  273. if (AtEndOfFile()) {
  274. return llvm::None;
  275. }
  276. TokenizedBuffer::Line root_line = tokens.GetLine(skip_root);
  277. int root_line_indent = tokens.GetIndentColumnNumber(root_line);
  278. // We will keep scanning through tokens on the same line as the root or
  279. // lines with greater indentation than root's line.
  280. auto is_same_line_or_indent_greater_than_root =
  281. [&](TokenizedBuffer::Token t) {
  282. TokenizedBuffer::Line l = tokens.GetLine(t);
  283. if (l == root_line) {
  284. return true;
  285. }
  286. return tokens.GetIndentColumnNumber(l) > root_line_indent;
  287. };
  288. do {
  289. if (NextTokenKind() == TokenKind::CloseCurlyBrace()) {
  290. // Immediately bail out if we hit an unmatched close curly, this will
  291. // pop us up a level of the syntax grouping.
  292. return llvm::None;
  293. }
  294. // We assume that a semicolon is always intended to be the end of the
  295. // current construct.
  296. if (auto semi = ConsumeIf(TokenKind::Semi())) {
  297. return on_semi(*semi);
  298. }
  299. // Skip over any matching group of tokens.
  300. if (SkipMatchingGroup()) {
  301. continue;
  302. }
  303. // Otherwise just step forward one token.
  304. Consume(NextTokenKind());
  305. } while (!AtEndOfFile() &&
  306. is_same_line_or_indent_greater_than_root(*position));
  307. return llvm::None;
  308. }
  309. auto ParseTree::Parser::ParseCloseParen(TokenizedBuffer::Token open_paren,
  310. ParseNodeKind kind)
  311. -> llvm::Optional<Node> {
  312. if (auto close_paren =
  313. ConsumeAndAddLeafNodeIf(TokenKind::CloseParen(), kind)) {
  314. return close_paren;
  315. }
  316. emitter.EmitError<ExpectedCloseParen>(*position, {.open_paren = open_paren});
  317. SkipTo(tokens.GetMatchedClosingToken(open_paren));
  318. AddLeafNode(kind, Consume(TokenKind::CloseParen()));
  319. return llvm::None;
  320. }
  321. template <typename ListElementParser, typename ListCompletionHandler>
  322. auto ParseTree::Parser::ParseParenList(ListElementParser list_element_parser,
  323. ParseNodeKind comma_kind,
  324. ListCompletionHandler list_handler,
  325. bool allow_trailing_comma)
  326. -> llvm::Optional<Node> {
  327. // `(` element-list[opt] `)`
  328. //
  329. // element-list ::= element
  330. // ::= element `,` element-list
  331. TokenizedBuffer::Token open_paren = Consume(TokenKind::OpenParen());
  332. bool has_errors = false;
  333. bool any_commas = false;
  334. int64_t num_elements = 0;
  335. // Parse elements, if any are specified.
  336. if (!NextTokenIs(TokenKind::CloseParen())) {
  337. while (true) {
  338. bool element_error = !list_element_parser();
  339. has_errors |= element_error;
  340. ++num_elements;
  341. if (!NextTokenIsOneOf({TokenKind::CloseParen(), TokenKind::Comma()})) {
  342. if (!element_error) {
  343. emitter.EmitError<UnexpectedTokenAfterListElement>(*position);
  344. }
  345. has_errors = true;
  346. auto end_of_element =
  347. FindNextOf({TokenKind::Comma(), TokenKind::CloseParen()});
  348. // The lexer guarantees that parentheses are balanced.
  349. assert(end_of_element && "missing matching `)` for `(`");
  350. SkipTo(*end_of_element);
  351. }
  352. if (NextTokenIs(TokenKind::CloseParen())) {
  353. break;
  354. }
  355. AddLeafNode(comma_kind, Consume(TokenKind::Comma()));
  356. any_commas = true;
  357. if (allow_trailing_comma && NextTokenIs(TokenKind::CloseParen())) {
  358. break;
  359. }
  360. }
  361. }
  362. bool is_single_item = num_elements == 1 && !any_commas;
  363. return list_handler(open_paren, is_single_item,
  364. Consume(TokenKind::CloseParen()), has_errors);
  365. }
  366. auto ParseTree::Parser::ParsePattern(PatternKind kind) -> llvm::Optional<Node> {
  367. if (NextTokenIs(TokenKind::Identifier()) &&
  368. tokens.GetKind(*(position + 1)) == TokenKind::Colon()) {
  369. // identifier `:` type
  370. auto start = GetSubtreeStartPosition();
  371. AddLeafNode(ParseNodeKind::DeclaredName(),
  372. Consume(TokenKind::Identifier()));
  373. auto colon = Consume(TokenKind::Colon());
  374. auto type = ParseType();
  375. return AddNode(ParseNodeKind::PatternBinding(), colon, start,
  376. /*has_error=*/!type);
  377. }
  378. switch (kind) {
  379. case PatternKind::Parameter:
  380. emitter.EmitError<ExpectedParameterName>(*position);
  381. break;
  382. case PatternKind::Variable:
  383. emitter.EmitError<ExpectedVariableName>(*position);
  384. break;
  385. }
  386. return llvm::None;
  387. }
  388. auto ParseTree::Parser::ParseFunctionParameter() -> llvm::Optional<Node> {
  389. return ParsePattern(PatternKind::Parameter);
  390. }
  391. auto ParseTree::Parser::ParseFunctionSignature() -> bool {
  392. auto start = GetSubtreeStartPosition();
  393. auto params = ParseParenList(
  394. [&] { return ParseFunctionParameter(); },
  395. ParseNodeKind::ParameterListComma(),
  396. [&](TokenizedBuffer::Token open_paren, bool is_single_item,
  397. TokenizedBuffer::Token close_paren, bool has_errors) {
  398. AddLeafNode(ParseNodeKind::ParameterListEnd(), close_paren);
  399. return AddNode(ParseNodeKind::ParameterList(), open_paren, start,
  400. has_errors);
  401. });
  402. auto start_return_type = GetSubtreeStartPosition();
  403. if (auto arrow = ConsumeIf(TokenKind::MinusGreater())) {
  404. auto return_type = ParseType();
  405. AddNode(ParseNodeKind::ReturnType(), *arrow, start_return_type,
  406. /*has_error=*/!return_type);
  407. if (!return_type) {
  408. return false;
  409. }
  410. }
  411. return params.hasValue();
  412. }
  413. auto ParseTree::Parser::ParseCodeBlock() -> llvm::Optional<Node> {
  414. llvm::Optional<TokenizedBuffer::Token> maybe_open_curly =
  415. ConsumeIf(TokenKind::OpenCurlyBrace());
  416. if (!maybe_open_curly) {
  417. // Recover by parsing a single statement.
  418. emitter.EmitError<ExpectedCodeBlock>(*position);
  419. return ParseStatement();
  420. }
  421. TokenizedBuffer::Token open_curly = *maybe_open_curly;
  422. auto start = GetSubtreeStartPosition();
  423. bool has_errors = false;
  424. // Loop over all the different possibly nested elements in the code block.
  425. while (!NextTokenIs(TokenKind::CloseCurlyBrace())) {
  426. if (!ParseStatement()) {
  427. // We detected and diagnosed an error of some kind. We can trivially skip
  428. // to the actual close curly brace from here.
  429. // FIXME: It would be better to skip to the next semicolon, or the next
  430. // token at the start of a line with the same indent as this one.
  431. SkipTo(tokens.GetMatchedClosingToken(open_curly));
  432. has_errors = true;
  433. break;
  434. }
  435. }
  436. // We always reach here having set our position in the token stream to the
  437. // close curly brace.
  438. AddLeafNode(ParseNodeKind::CodeBlockEnd(),
  439. Consume(TokenKind::CloseCurlyBrace()));
  440. return AddNode(ParseNodeKind::CodeBlock(), open_curly, start, has_errors);
  441. }
  442. auto ParseTree::Parser::ParseFunctionDeclaration() -> Node {
  443. TokenizedBuffer::Token function_intro_token = Consume(TokenKind::FnKeyword());
  444. auto start = GetSubtreeStartPosition();
  445. auto add_error_function_node = [&] {
  446. return AddNode(ParseNodeKind::FunctionDeclaration(), function_intro_token,
  447. start, /*has_error=*/true);
  448. };
  449. auto handle_semi_in_error_recovery = [&](TokenizedBuffer::Token semi) {
  450. return AddLeafNode(ParseNodeKind::DeclarationEnd(), semi);
  451. };
  452. auto name_n = ConsumeAndAddLeafNodeIf(TokenKind::Identifier(),
  453. ParseNodeKind::DeclaredName());
  454. if (!name_n) {
  455. emitter.EmitError<ExpectedFunctionName>(*position);
  456. // FIXME: We could change the lexer to allow us to synthesize certain
  457. // kinds of tokens and try to "recover" here, but unclear that this is
  458. // really useful.
  459. SkipPastLikelyEnd(function_intro_token, handle_semi_in_error_recovery);
  460. return add_error_function_node();
  461. }
  462. TokenizedBuffer::Token open_paren = *position;
  463. if (tokens.GetKind(open_paren) != TokenKind::OpenParen()) {
  464. emitter.EmitError<ExpectedFunctionParams>(open_paren);
  465. SkipPastLikelyEnd(function_intro_token, handle_semi_in_error_recovery);
  466. return add_error_function_node();
  467. }
  468. TokenizedBuffer::Token close_paren =
  469. tokens.GetMatchedClosingToken(open_paren);
  470. if (!ParseFunctionSignature()) {
  471. // Don't try to parse more of the function declaration, but consume a
  472. // declaration ending semicolon if found (without going to a new line).
  473. SkipPastLikelyEnd(function_intro_token, handle_semi_in_error_recovery);
  474. return add_error_function_node();
  475. }
  476. // See if we should parse a definition which is represented as a code block.
  477. if (NextTokenIs(TokenKind::OpenCurlyBrace())) {
  478. if (!ParseCodeBlock()) {
  479. return add_error_function_node();
  480. }
  481. } else if (!ConsumeAndAddLeafNodeIf(TokenKind::Semi(),
  482. ParseNodeKind::DeclarationEnd())) {
  483. emitter.EmitError<ExpectedFunctionBodyOrSemi>(*position);
  484. if (tokens.GetLine(*position) == tokens.GetLine(close_paren)) {
  485. // Only need to skip if we've not already found a new line.
  486. SkipPastLikelyEnd(function_intro_token, handle_semi_in_error_recovery);
  487. }
  488. return add_error_function_node();
  489. }
  490. // Successfully parsed the function, add that node.
  491. return AddNode(ParseNodeKind::FunctionDeclaration(), function_intro_token,
  492. start);
  493. }
  494. auto ParseTree::Parser::ParseVariableDeclaration() -> Node {
  495. // `var` pattern [= expression] `;`
  496. TokenizedBuffer::Token var_token = Consume(TokenKind::VarKeyword());
  497. auto start = GetSubtreeStartPosition();
  498. auto pattern = ParsePattern(PatternKind::Variable);
  499. if (!pattern) {
  500. if (auto after_pattern =
  501. FindNextOf({TokenKind::Equal(), TokenKind::Semi()})) {
  502. SkipTo(*after_pattern);
  503. }
  504. }
  505. auto start_init = GetSubtreeStartPosition();
  506. if (auto equal_token = ConsumeIf(TokenKind::Equal())) {
  507. auto init = ParseExpression();
  508. AddNode(ParseNodeKind::VariableInitializer(), *equal_token, start_init,
  509. /*has_error=*/!init);
  510. }
  511. auto semi = ConsumeAndAddLeafNodeIf(TokenKind::Semi(),
  512. ParseNodeKind::DeclarationEnd());
  513. if (!semi) {
  514. emitter.EmitError<ExpectedSemiAfterExpression>(*position);
  515. SkipPastLikelyEnd(var_token, [&](TokenizedBuffer::Token semi) {
  516. return AddLeafNode(ParseNodeKind::DeclarationEnd(), semi);
  517. });
  518. }
  519. return AddNode(ParseNodeKind::VariableDeclaration(), var_token, start,
  520. /*has_error=*/!pattern || !semi);
  521. }
  522. auto ParseTree::Parser::ParseEmptyDeclaration() -> Node {
  523. return AddLeafNode(ParseNodeKind::EmptyDeclaration(),
  524. Consume(TokenKind::Semi()));
  525. }
  526. auto ParseTree::Parser::ParseDeclaration() -> llvm::Optional<Node> {
  527. switch (NextTokenKind()) {
  528. case TokenKind::FnKeyword():
  529. return ParseFunctionDeclaration();
  530. case TokenKind::VarKeyword():
  531. return ParseVariableDeclaration();
  532. case TokenKind::Semi():
  533. return ParseEmptyDeclaration();
  534. case TokenKind::EndOfFile():
  535. return llvm::None;
  536. default:
  537. // Errors are handled outside the switch.
  538. break;
  539. }
  540. // We didn't recognize an introducer for a valid declaration.
  541. emitter.EmitError<UnrecognizedDeclaration>(*position);
  542. // Skip forward past any end of a declaration we simply didn't understand so
  543. // that we can find the start of the next declaration or the end of a scope.
  544. if (auto found_semi_n =
  545. SkipPastLikelyEnd(*position, [&](TokenizedBuffer::Token semi) {
  546. return AddLeafNode(ParseNodeKind::EmptyDeclaration(), semi);
  547. })) {
  548. MarkNodeError(*found_semi_n);
  549. return *found_semi_n;
  550. }
  551. // Nothing, not even a semicolon found.
  552. return llvm::None;
  553. }
  554. auto ParseTree::Parser::ParseParenExpression() -> llvm::Optional<Node> {
  555. // parenthesized-expression ::= `(` expression `)`
  556. // tuple-literal ::= `(` `)`
  557. // ::= `(` expression `,` [expression-list [`,`]] `)`
  558. //
  559. // Parse the union of these, `(` [expression-list [`,`]] `)`, and work out
  560. // whether it's a tuple or a parenthesized expression afterwards.
  561. auto start = GetSubtreeStartPosition();
  562. return ParseParenList(
  563. [&] { return ParseExpression(); }, ParseNodeKind::TupleLiteralComma(),
  564. [&](TokenizedBuffer::Token open_paren, bool is_single_item,
  565. TokenizedBuffer::Token close_paren, bool has_arg_errors) {
  566. AddLeafNode(is_single_item ? ParseNodeKind::ParenExpressionEnd()
  567. : ParseNodeKind::TupleLiteralEnd(),
  568. close_paren);
  569. return AddNode(is_single_item ? ParseNodeKind::ParenExpression()
  570. : ParseNodeKind::TupleLiteral(),
  571. open_paren, start, has_arg_errors);
  572. },
  573. /*allow_trailing_comma=*/true);
  574. }
  575. auto ParseTree::Parser::ParsePrimaryExpression() -> llvm::Optional<Node> {
  576. llvm::Optional<ParseNodeKind> kind;
  577. switch (NextTokenKind()) {
  578. case TokenKind::Identifier():
  579. kind = ParseNodeKind::NameReference();
  580. break;
  581. case TokenKind::IntegerLiteral():
  582. case TokenKind::RealLiteral():
  583. case TokenKind::StringLiteral():
  584. case TokenKind::IntegerTypeLiteral():
  585. case TokenKind::UnsignedIntegerTypeLiteral():
  586. case TokenKind::FloatingPointTypeLiteral():
  587. kind = ParseNodeKind::Literal();
  588. break;
  589. case TokenKind::OpenParen():
  590. return ParseParenExpression();
  591. default:
  592. emitter.EmitError<ExpectedExpression>(*position);
  593. return llvm::None;
  594. }
  595. return AddLeafNode(*kind, Consume(NextTokenKind()));
  596. }
  597. auto ParseTree::Parser::ParseDesignatorExpression(SubtreeStart start,
  598. bool has_errors)
  599. -> llvm::Optional<Node> {
  600. // `.` identifier
  601. auto dot = Consume(TokenKind::Period());
  602. auto name = ConsumeIf(TokenKind::Identifier());
  603. if (name) {
  604. AddLeafNode(ParseNodeKind::DesignatedName(), *name);
  605. } else {
  606. emitter.EmitError<ExpectedIdentifierAfterDot>(*position);
  607. // If we see a keyword, assume it was intended to be the designated name.
  608. // TODO: Should keywords be valid in designators?
  609. if (NextTokenKind().IsKeyword()) {
  610. Consume(NextTokenKind());
  611. }
  612. has_errors = true;
  613. }
  614. return AddNode(ParseNodeKind::DesignatorExpression(), dot, start, has_errors);
  615. }
  616. auto ParseTree::Parser::ParseCallExpression(SubtreeStart start, bool has_errors)
  617. -> llvm::Optional<Node> {
  618. // `(` expression-list[opt] `)`
  619. //
  620. // expression-list ::= expression
  621. // ::= expression `,` expression-list
  622. return ParseParenList(
  623. [&] { return ParseExpression(); }, ParseNodeKind::CallExpressionComma(),
  624. [&](TokenizedBuffer::Token open_paren, bool is_single_item,
  625. TokenizedBuffer::Token close_paren, bool has_arg_errors) {
  626. AddLeafNode(ParseNodeKind::CallExpressionEnd(), close_paren);
  627. return AddNode(ParseNodeKind::CallExpression(), open_paren, start,
  628. has_errors || has_arg_errors);
  629. });
  630. }
  631. auto ParseTree::Parser::ParsePostfixExpression() -> llvm::Optional<Node> {
  632. auto start = GetSubtreeStartPosition();
  633. llvm::Optional<Node> expression = ParsePrimaryExpression();
  634. while (true) {
  635. switch (NextTokenKind()) {
  636. case TokenKind::Period():
  637. expression = ParseDesignatorExpression(start, !expression);
  638. break;
  639. case TokenKind::OpenParen():
  640. expression = ParseCallExpression(start, !expression);
  641. break;
  642. default: {
  643. return expression;
  644. }
  645. }
  646. }
  647. }
  648. // Determines whether the given token is considered to be the start of an
  649. // operand according to the rules for infix operator parsing.
  650. static auto IsAssumedStartOfOperand(TokenKind kind) -> bool {
  651. return kind.IsOneOf({TokenKind::OpenParen(), TokenKind::Identifier(),
  652. TokenKind::IntegerLiteral(), TokenKind::RealLiteral(),
  653. TokenKind::StringLiteral()});
  654. }
  655. // Determines whether the given token is considered to be the end of an operand
  656. // according to the rules for infix operator parsing.
  657. static auto IsAssumedEndOfOperand(TokenKind kind) -> bool {
  658. return kind.IsOneOf({TokenKind::CloseParen(), TokenKind::CloseCurlyBrace(),
  659. TokenKind::CloseSquareBracket(), TokenKind::Identifier(),
  660. TokenKind::IntegerLiteral(), TokenKind::RealLiteral(),
  661. TokenKind::StringLiteral()});
  662. }
  663. // Determines whether the given token could possibly be the start of an operand.
  664. // This is conservatively correct, and will never incorrectly return `false`,
  665. // but can incorrectly return `true`.
  666. static auto IsPossibleStartOfOperand(TokenKind kind) -> bool {
  667. return !kind.IsOneOf({TokenKind::CloseParen(), TokenKind::CloseCurlyBrace(),
  668. TokenKind::CloseSquareBracket(), TokenKind::Comma(),
  669. TokenKind::Semi(), TokenKind::Colon()});
  670. }
  671. auto ParseTree::Parser::IsLexicallyValidInfixOperator() -> bool {
  672. assert(!AtEndOfFile() && "Expected an operator token.");
  673. bool leading_space = tokens.HasLeadingWhitespace(*position);
  674. bool trailing_space = tokens.HasTrailingWhitespace(*position);
  675. // If there's whitespace on both sides, it's an infix operator.
  676. if (leading_space && trailing_space) {
  677. return true;
  678. }
  679. // If there's whitespace on exactly one side, it's not an infix operator.
  680. if (leading_space || trailing_space) {
  681. return false;
  682. }
  683. // Otherwise, for an infix operator, the preceding token must be any close
  684. // bracket, identifier, or literal and the next token must be an open paren,
  685. // identifier, or literal.
  686. if (position == tokens.Tokens().begin() ||
  687. !IsAssumedEndOfOperand(tokens.GetKind(*(position - 1))) ||
  688. !IsAssumedStartOfOperand(tokens.GetKind(*(position + 1)))) {
  689. return false;
  690. }
  691. return true;
  692. }
  693. auto ParseTree::Parser::DiagnoseOperatorFixity(OperatorFixity fixity) -> void {
  694. bool is_valid_as_infix = IsLexicallyValidInfixOperator();
  695. if (fixity == OperatorFixity::Infix) {
  696. // Infix operators must satisfy the infix operator rules.
  697. if (!is_valid_as_infix) {
  698. emitter.EmitError<BinaryOperatorRequiresWhitespace>(
  699. *position,
  700. {.has_leading_space = tokens.HasLeadingWhitespace(*position),
  701. .has_trailing_space = tokens.HasTrailingWhitespace(*position)});
  702. }
  703. } else {
  704. bool prefix = fixity == OperatorFixity::Prefix;
  705. // Whitespace is not permitted between a symbolic pre/postfix operator and
  706. // its operand.
  707. if (NextTokenKind().IsSymbol() &&
  708. (prefix ? tokens.HasTrailingWhitespace(*position)
  709. : tokens.HasLeadingWhitespace(*position))) {
  710. emitter.EmitError<UnaryOperatorHasWhitespace>(*position,
  711. {.prefix = prefix});
  712. }
  713. // Pre/postfix operators must not satisfy the infix operator rules.
  714. if (is_valid_as_infix) {
  715. emitter.EmitError<UnaryOperatorRequiresWhitespace>(*position,
  716. {.prefix = prefix});
  717. }
  718. }
  719. }
  720. auto ParseTree::Parser::IsTrailingOperatorInfix() -> bool {
  721. if (AtEndOfFile()) {
  722. return false;
  723. }
  724. // An operator that follows the infix operator rules is parsed as
  725. // infix, unless the next token means that it can't possibly be.
  726. if (IsLexicallyValidInfixOperator() &&
  727. IsPossibleStartOfOperand(tokens.GetKind(*(position + 1)))) {
  728. return true;
  729. }
  730. // A trailing operator with leading whitespace that's not valid as infix is
  731. // not valid at all. If the next token looks like the start of an operand,
  732. // then parse as infix, otherwise as postfix. Either way we'll produce a
  733. // diagnostic later on.
  734. if (tokens.HasLeadingWhitespace(*position) &&
  735. IsAssumedStartOfOperand(tokens.GetKind(*(position + 1)))) {
  736. return true;
  737. }
  738. return false;
  739. }
  740. auto ParseTree::Parser::ParseOperatorExpression(
  741. PrecedenceGroup ambient_precedence) -> llvm::Optional<Node> {
  742. auto start = GetSubtreeStartPosition();
  743. llvm::Optional<Node> lhs;
  744. PrecedenceGroup lhs_precedence = PrecedenceGroup::ForPostfixExpression();
  745. // Check for a prefix operator.
  746. if (auto operator_precedence = PrecedenceGroup::ForLeading(NextTokenKind());
  747. !operator_precedence) {
  748. lhs = ParsePostfixExpression();
  749. } else {
  750. if (PrecedenceGroup::GetPriority(ambient_precedence,
  751. *operator_precedence) !=
  752. OperatorPriority::RightFirst) {
  753. // The precedence rules don't permit this prefix operator in this
  754. // context. Diagnose this, but carry on and parse it anyway.
  755. emitter.EmitError<OperatorRequiresParentheses>(*position);
  756. } else {
  757. // Check that this operator follows the proper whitespace rules.
  758. DiagnoseOperatorFixity(OperatorFixity::Prefix);
  759. }
  760. auto operator_token = Consume(NextTokenKind());
  761. bool has_errors = !ParseOperatorExpression(*operator_precedence);
  762. lhs = AddNode(ParseNodeKind::PrefixOperator(), operator_token, start,
  763. has_errors);
  764. lhs_precedence = *operator_precedence;
  765. }
  766. // Consume a sequence of infix and postfix operators.
  767. while (auto trailing_operator = PrecedenceGroup::ForTrailing(
  768. NextTokenKind(), IsTrailingOperatorInfix())) {
  769. auto [operator_precedence, is_binary] = *trailing_operator;
  770. // FIXME: If this operator is ambiguous with either the ambient precedence
  771. // or the LHS precedence, and there's a variant with a different fixity
  772. // that would work, use that one instead for error recovery.
  773. if (PrecedenceGroup::GetPriority(ambient_precedence, operator_precedence) !=
  774. OperatorPriority::RightFirst) {
  775. // The precedence rules don't permit this operator in this context. Try
  776. // again in the enclosing expression context.
  777. return lhs;
  778. }
  779. if (PrecedenceGroup::GetPriority(lhs_precedence, operator_precedence) !=
  780. OperatorPriority::LeftFirst) {
  781. // Either the LHS operator and this operator are ambiguous, or the
  782. // LHS operaor is a unary operator that can't be nested within
  783. // this operator. Either way, parentheses are required.
  784. emitter.EmitError<OperatorRequiresParentheses>(*position);
  785. lhs = llvm::None;
  786. } else {
  787. DiagnoseOperatorFixity(is_binary ? OperatorFixity::Infix
  788. : OperatorFixity::Postfix);
  789. }
  790. auto operator_token = Consume(NextTokenKind());
  791. if (is_binary) {
  792. auto rhs = ParseOperatorExpression(operator_precedence);
  793. lhs = AddNode(ParseNodeKind::InfixOperator(), operator_token, start,
  794. /*has_error=*/!lhs || !rhs);
  795. } else {
  796. lhs = AddNode(ParseNodeKind::PostfixOperator(), operator_token, start,
  797. /*has_error=*/!lhs);
  798. }
  799. lhs_precedence = operator_precedence;
  800. }
  801. return lhs;
  802. }
  803. auto ParseTree::Parser::ParseExpression() -> llvm::Optional<Node> {
  804. return ParseOperatorExpression(PrecedenceGroup::ForTopLevelExpression());
  805. }
  806. auto ParseTree::Parser::ParseType() -> llvm::Optional<Node> {
  807. return ParseOperatorExpression(PrecedenceGroup::ForType());
  808. }
  809. auto ParseTree::Parser::ParseExpressionStatement() -> llvm::Optional<Node> {
  810. TokenizedBuffer::Token start_token = *position;
  811. auto start = GetSubtreeStartPosition();
  812. bool has_errors = !ParseExpression();
  813. if (auto semi = ConsumeIf(TokenKind::Semi())) {
  814. return AddNode(ParseNodeKind::ExpressionStatement(), *semi, start,
  815. has_errors);
  816. }
  817. if (!has_errors) {
  818. emitter.EmitError<ExpectedSemiAfterExpression>(*position);
  819. }
  820. if (auto recovery_node =
  821. SkipPastLikelyEnd(start_token, [&](TokenizedBuffer::Token semi) {
  822. return AddNode(ParseNodeKind::ExpressionStatement(), semi, start,
  823. true);
  824. })) {
  825. return recovery_node;
  826. }
  827. // Found junk not even followed by a `;`.
  828. return llvm::None;
  829. }
  830. auto ParseTree::Parser::ParseParenCondition(TokenKind introducer)
  831. -> llvm::Optional<Node> {
  832. // `(` expression `)`
  833. auto start = GetSubtreeStartPosition();
  834. auto open_paren = ConsumeIf(TokenKind::OpenParen());
  835. if (!open_paren) {
  836. emitter.EmitError<ExpectedParenAfter>(*position,
  837. {.introducer = introducer});
  838. }
  839. auto expr = ParseExpression();
  840. if (!open_paren) {
  841. // Don't expect a matching closing paren if there wasn't an opening paren.
  842. return llvm::None;
  843. }
  844. auto close_paren =
  845. ParseCloseParen(*open_paren, ParseNodeKind::ConditionEnd());
  846. return AddNode(ParseNodeKind::Condition(), *open_paren, start,
  847. /*has_error=*/!expr || !close_paren);
  848. }
  849. auto ParseTree::Parser::ParseIfStatement() -> llvm::Optional<Node> {
  850. auto start = GetSubtreeStartPosition();
  851. auto if_token = Consume(TokenKind::IfKeyword());
  852. auto cond = ParseParenCondition(TokenKind::IfKeyword());
  853. auto then_case = ParseCodeBlock();
  854. bool else_has_errors = false;
  855. if (ConsumeAndAddLeafNodeIf(TokenKind::ElseKeyword(),
  856. ParseNodeKind::IfStatementElse())) {
  857. // 'else if' is permitted as a special case.
  858. if (NextTokenIs(TokenKind::IfKeyword()))
  859. else_has_errors = !ParseIfStatement();
  860. else
  861. else_has_errors = !ParseCodeBlock();
  862. }
  863. return AddNode(ParseNodeKind::IfStatement(), if_token, start,
  864. /*has_error=*/!cond || !then_case || else_has_errors);
  865. }
  866. auto ParseTree::Parser::ParseWhileStatement() -> llvm::Optional<Node> {
  867. auto start = GetSubtreeStartPosition();
  868. auto while_token = Consume(TokenKind::WhileKeyword());
  869. auto cond = ParseParenCondition(TokenKind::WhileKeyword());
  870. auto body = ParseCodeBlock();
  871. return AddNode(ParseNodeKind::WhileStatement(), while_token, start,
  872. /*has_error=*/!cond || !body);
  873. }
  874. auto ParseTree::Parser::ParseKeywordStatement(ParseNodeKind kind,
  875. KeywordStatementArgument argument)
  876. -> llvm::Optional<Node> {
  877. auto keyword_kind = NextTokenKind();
  878. assert(keyword_kind.IsKeyword());
  879. auto start = GetSubtreeStartPosition();
  880. auto keyword = Consume(keyword_kind);
  881. bool arg_error = false;
  882. if ((argument == KeywordStatementArgument::Optional &&
  883. NextTokenKind() != TokenKind::Semi()) ||
  884. argument == KeywordStatementArgument::Mandatory) {
  885. arg_error = !ParseExpression();
  886. }
  887. auto semi =
  888. ConsumeAndAddLeafNodeIf(TokenKind::Semi(), ParseNodeKind::StatementEnd());
  889. if (!semi) {
  890. emitter.EmitError<ExpectedSemiAfter>(*position,
  891. {.preceding = keyword_kind});
  892. // FIXME: Try to skip to a semicolon to recover.
  893. }
  894. return AddNode(kind, keyword, start, /*has_error=*/!semi || arg_error);
  895. }
  896. auto ParseTree::Parser::ParseStatement() -> llvm::Optional<Node> {
  897. switch (NextTokenKind()) {
  898. case TokenKind::VarKeyword():
  899. return ParseVariableDeclaration();
  900. case TokenKind::IfKeyword():
  901. return ParseIfStatement();
  902. case TokenKind::WhileKeyword():
  903. return ParseWhileStatement();
  904. case TokenKind::ContinueKeyword():
  905. return ParseKeywordStatement(ParseNodeKind::ContinueStatement(),
  906. KeywordStatementArgument::None);
  907. case TokenKind::BreakKeyword():
  908. return ParseKeywordStatement(ParseNodeKind::BreakStatement(),
  909. KeywordStatementArgument::None);
  910. case TokenKind::ReturnKeyword():
  911. return ParseKeywordStatement(ParseNodeKind::ReturnStatement(),
  912. KeywordStatementArgument::Optional);
  913. default:
  914. // A statement with no introducer token can only be an expression
  915. // statement.
  916. return ParseExpressionStatement();
  917. }
  918. }
  919. } // namespace Carbon