parser2.cpp 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "toolchain/parser/parser2.h"
  5. #include <cstdlib>
  6. #include <memory>
  7. #include "common/check.h"
  8. #include "llvm/ADT/Optional.h"
  9. #include "toolchain/lexer/token_kind.h"
  10. #include "toolchain/lexer/tokenized_buffer.h"
  11. #include "toolchain/parser/parse_node_kind.h"
  12. #include "toolchain/parser/parse_tree.h"
  13. namespace Carbon {
  14. Parser2::Parser2(ParseTree& tree_arg, TokenizedBuffer& tokens_arg,
  15. TokenDiagnosticEmitter& emitter)
  16. : tree_(tree_arg),
  17. tokens_(tokens_arg),
  18. emitter_(emitter),
  19. position_(tokens_.tokens().begin()),
  20. end_(tokens_.tokens().end()) {
  21. CARBON_CHECK(position_ != end_) << "Empty TokenizedBuffer";
  22. --end_;
  23. CARBON_CHECK(tokens_.GetKind(*end_) == TokenKind::EndOfFile())
  24. << "TokenizedBuffer should end with EndOfFile, ended with "
  25. << tokens_.GetKind(*end_).Name();
  26. }
  27. auto Parser2::AddLeafNode(ParseNodeKind kind, TokenizedBuffer::Token token,
  28. bool has_error) -> void {
  29. tree_.node_impls_.push_back(
  30. ParseTree::NodeImpl(kind, has_error, token, /*subtree_size=*/1));
  31. if (has_error) {
  32. tree_.has_errors_ = true;
  33. }
  34. }
  35. auto Parser2::AddNode(ParseNodeKind kind, TokenizedBuffer::Token token,
  36. int subtree_start, bool has_error) -> void {
  37. int subtree_size = tree_.size() - subtree_start + 1;
  38. tree_.node_impls_.push_back(
  39. ParseTree::NodeImpl(kind, has_error, token, subtree_size));
  40. if (has_error) {
  41. tree_.has_errors_ = true;
  42. }
  43. }
  44. auto Parser2::ConsumeAndAddLeafNodeIf(TokenKind token_kind,
  45. ParseNodeKind node_kind) -> bool {
  46. auto token = ConsumeIf(token_kind);
  47. if (!token) {
  48. return false;
  49. }
  50. AddLeafNode(node_kind, *token);
  51. return true;
  52. }
  53. auto Parser2::ConsumeIf(TokenKind kind)
  54. -> llvm::Optional<TokenizedBuffer::Token> {
  55. if (!PositionIs(kind)) {
  56. return llvm::None;
  57. }
  58. auto token = *position_;
  59. ++position_;
  60. return token;
  61. }
  62. auto Parser2::Parse() -> void {
  63. PushState(ParserState::Declaration());
  64. while (!state_stack_.empty()) {
  65. switch (state_stack_.back().state) {
  66. #define CARBON_PARSER_STATE(Name) \
  67. case ParserState::Name(): \
  68. Handle##Name##State(); \
  69. break;
  70. #include "toolchain/parser/parser_state.def"
  71. }
  72. }
  73. AddLeafNode(ParseNodeKind::FileEnd(), *position_);
  74. }
  75. auto Parser2::SkipMatchingGroup() -> bool {
  76. if (!PositionKind().IsOpeningSymbol()) {
  77. return false;
  78. }
  79. SkipTo(tokens_.GetMatchedClosingToken(*position_));
  80. ++position_;
  81. return true;
  82. }
  83. auto Parser2::SkipPastLikelyEnd(TokenizedBuffer::Token skip_root)
  84. -> llvm::Optional<TokenizedBuffer::Token> {
  85. CARBON_CHECK(position_ < end_);
  86. TokenizedBuffer::Line root_line = tokens_.GetLine(skip_root);
  87. int root_line_indent = tokens_.GetIndentColumnNumber(root_line);
  88. // We will keep scanning through tokens on the same line as the root or
  89. // lines with greater indentation than root's line.
  90. auto is_same_line_or_indent_greater_than_root =
  91. [&](TokenizedBuffer::Token t) {
  92. TokenizedBuffer::Line l = tokens_.GetLine(t);
  93. if (l == root_line) {
  94. return true;
  95. }
  96. return tokens_.GetIndentColumnNumber(l) > root_line_indent;
  97. };
  98. do {
  99. if (PositionIs(TokenKind::CloseCurlyBrace())) {
  100. // Immediately bail out if we hit an unmatched close curly, this will
  101. // pop us up a level of the syntax grouping.
  102. return llvm::None;
  103. }
  104. // We assume that a semicolon is always intended to be the end of the
  105. // current construct.
  106. if (auto semi = ConsumeIf(TokenKind::Semi())) {
  107. return semi;
  108. }
  109. // Skip over any matching group of tokens_.
  110. if (SkipMatchingGroup()) {
  111. continue;
  112. }
  113. // Otherwise just step forward one token.
  114. ++position_;
  115. } while (position_ != end_ &&
  116. is_same_line_or_indent_greater_than_root(*position_));
  117. return llvm::None;
  118. }
  119. auto Parser2::SkipTo(TokenizedBuffer::Token t) -> void {
  120. CARBON_CHECK(t > *position_) << "Tried to skip backwards.";
  121. position_ = TokenizedBuffer::TokenIterator(t);
  122. CARBON_CHECK(position_ != end_) << "Skipped past EOF.";
  123. }
  124. auto Parser2::HandleDeclarationState() -> void {
  125. do {
  126. switch (auto token_kind = PositionKind()) {
  127. case TokenKind::EndOfFile(): {
  128. state_stack_.pop_back();
  129. return;
  130. }
  131. case TokenKind::Fn(): {
  132. PushState(ParserState::FunctionIntroducer());
  133. AddLeafNode(ParseNodeKind::FunctionIntroducer(), *position_);
  134. ++position_;
  135. return;
  136. }
  137. case TokenKind::Semi(): {
  138. AddLeafNode(ParseNodeKind::EmptyDeclaration(), *position_);
  139. ++position_;
  140. break;
  141. }
  142. default: {
  143. CARBON_DIAGNOSTIC(UnrecognizedDeclaration, Error,
  144. "Unrecognized declaration introducer.");
  145. emitter_.Emit(*position_, UnrecognizedDeclaration);
  146. tree_.has_errors_ = true;
  147. if (auto semi = SkipPastLikelyEnd(*position_)) {
  148. AddLeafNode(ParseNodeKind::EmptyDeclaration(), *semi,
  149. /*has_error=*/true);
  150. }
  151. break;
  152. }
  153. }
  154. } while (position_ < end_);
  155. }
  156. auto Parser2::HandleFunctionError(bool skip_past_likely_end) -> void {
  157. auto token = state_stack_.back().start_token;
  158. if (skip_past_likely_end && SkipPastLikelyEnd(token)) {
  159. token = *position_;
  160. }
  161. AddNode(ParseNodeKind::FunctionDeclaration(), token,
  162. state_stack_.back().subtree_start,
  163. /*has_error=*/true);
  164. state_stack_.pop_back();
  165. }
  166. auto Parser2::HandleFunctionIntroducerState() -> void {
  167. if (!ConsumeAndAddLeafNodeIf(TokenKind::Identifier(),
  168. ParseNodeKind::DeclaredName())) {
  169. CARBON_DIAGNOSTIC(ExpectedFunctionName, Error,
  170. "Expected function name after `fn` keyword.");
  171. emitter_.Emit(*position_, ExpectedFunctionName);
  172. // TODO: We could change the lexer to allow us to synthesize certain
  173. // kinds of tokens and try to "recover" here, but unclear that this is
  174. // really useful.
  175. HandleFunctionError(true);
  176. return;
  177. }
  178. if (!PositionIs(TokenKind::OpenParen())) {
  179. CARBON_DIAGNOSTIC(ExpectedFunctionParams, Error,
  180. "Expected `(` after function name.");
  181. emitter_.Emit(*position_, ExpectedFunctionParams);
  182. HandleFunctionError(true);
  183. return;
  184. }
  185. // Parse the parameter list as its own subtree; once that pops, resume
  186. // function parsing.
  187. state_stack_.back().state = ParserState::FunctionParameterListDone();
  188. PushState(ParserState::FunctionParameterList());
  189. // Advance past the open parenthesis before continuing.
  190. // TODO: When swapping () start/end, this should AddNode the open before
  191. // continuing.
  192. ++position_;
  193. }
  194. auto Parser2::HandleFunctionParameterListState() -> void {
  195. // TODO: Handle non-empty lists.
  196. if (!PositionIs(TokenKind::CloseParen())) {
  197. CARBON_DIAGNOSTIC(ExpectedFunctionParams, Error,
  198. "Expected `(` after function name.");
  199. emitter_.Emit(*position_, ExpectedFunctionParams);
  200. SkipTo(tokens_.GetMatchedClosingToken(state_stack_.back().start_token));
  201. AddLeafNode(ParseNodeKind::ParameterListEnd(), *position_,
  202. /*has_error=*/true);
  203. AddNode(ParseNodeKind::ParameterList(), state_stack_.back().start_token,
  204. state_stack_.back().subtree_start);
  205. ++position_;
  206. return;
  207. }
  208. AddLeafNode(ParseNodeKind::ParameterListEnd(), *position_);
  209. AddNode(ParseNodeKind::ParameterList(), state_stack_.back().start_token,
  210. state_stack_.back().subtree_start);
  211. ++position_;
  212. state_stack_.pop_back();
  213. }
  214. auto Parser2::HandleFunctionParameterListDoneState() -> void {
  215. switch (auto token_kind = PositionKind()) {
  216. case TokenKind::Semi(): {
  217. AddNode(ParseNodeKind::FunctionDeclaration(), *position_,
  218. state_stack_.back().subtree_start);
  219. ++position_;
  220. state_stack_.pop_back();
  221. break;
  222. }
  223. // TODO: OpenCurlyBrace is a definition.
  224. case TokenKind::OpenCurlyBrace(): {
  225. CARBON_DIAGNOSTIC(
  226. ExpectedFunctionBodyOrSemi, Error,
  227. "Expected function definition or `;` after function declaration.");
  228. emitter_.Emit(*position_, ExpectedFunctionBodyOrSemi);
  229. HandleFunctionError(true);
  230. break;
  231. }
  232. default: {
  233. CARBON_DIAGNOSTIC(
  234. ExpectedFunctionBodyOrSemi, Error,
  235. "Expected function definition or `;` after function declaration.");
  236. emitter_.Emit(*position_, ExpectedFunctionBodyOrSemi);
  237. // Only need to skip if we've not already found a new line.
  238. HandleFunctionError(tokens_.GetLine(*position_) ==
  239. tokens_.GetLine(state_stack_.back().start_token));
  240. break;
  241. }
  242. }
  243. }
  244. } // namespace Carbon