Просмотр исходного кода

Modify parsing to monitor stack depth and error before overflow (#987)

Jon Meow 4 лет назад
Родитель
Сommit
29522e6ed8

+ 1 - 0
.codespell_ignore

@@ -2,6 +2,7 @@
 # Exceptions. See /LICENSE for license information.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
+atleast
 circularly
 copyable
 crate

BIN
toolchain/parser/fuzzer_corpus/811dedfa7c1108081bd7d76e57f2b15c1967985a


+ 5 - 0
toolchain/parser/parse_tree.h

@@ -46,6 +46,11 @@ class ParseTree {
   class PostorderIterator;
   class SiblingIterator;
 
+  // The maximum stack depth allowed while recursing the parse tree.
+  // This is meant to approximate system stack limits, but we may need to find a
+  // better way to track what the system is enforcing.
+  static constexpr int StackDepthLimit = 200;
+
   // Parses the token buffer into a `ParseTree`.
   //
   // This is the factory function which is used to build parse trees.

+ 22 - 0
toolchain/parser/parse_tree_test.cpp

@@ -11,6 +11,7 @@
 
 #include "common/ostream.h"
 #include "llvm/ADT/Sequence.h"
+#include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/SourceMgr.h"
 #include "toolchain/common/yaml_test_helpers.h"
 #include "toolchain/diagnostics/diagnostic_emitter.h"
@@ -22,6 +23,7 @@
 namespace Carbon::Testing {
 namespace {
 
+using ::testing::AtLeast;
 using ::testing::ElementsAre;
 using ::testing::Eq;
 using ::testing::Ne;
@@ -1170,5 +1172,25 @@ TEST_F(ParseTreeTest, ParenMatchRegression) {
                  MatchFileEnd()}));
 }
 
+TEST_F(ParseTreeTest, RecursionLimit) {
+  std::string code = "fn Foo() { return ";
+  code.append(10000, '(');
+  code.append(10000, ')');
+  code += "; }";
+  TokenizedBuffer tokens = GetTokenizedBuffer(code);
+  ASSERT_FALSE(tokens.HasErrors());
+  Testing::MockDiagnosticConsumer consumer;
+  // Recursion might be exceeded multiple times due to quirks in parse tree
+  // handling; we only need to be sure it's hit at least once for test
+  // correctness.
+  EXPECT_CALL(
+      consumer,
+      HandleDiagnostic(DiagnosticMessage(llvm::formatv(
+          "Exceeded recursion limit ({0})", ParseTree::StackDepthLimit))))
+      .Times(AtLeast(1));
+  ParseTree tree = ParseTree::Parse(tokens, consumer);
+  EXPECT_TRUE(tree.HasErrors());
+}
+
 }  // namespace
 }  // namespace Carbon::Testing

+ 61 - 0
toolchain/parser/parser_impl.cpp

@@ -17,6 +17,44 @@
 
 namespace Carbon {
 
+struct StackLimitExceeded {
+  static constexpr llvm::StringLiteral ShortName = "syntax-error";
+
+  auto Format() -> std::string {
+    return llvm::formatv("Exceeded recursion limit ({0})",
+                         ParseTree::StackDepthLimit);
+  }
+};
+
+// Manages the parser's stack depth, particularly decrementing on destruction.
+// This should only be instantiated through RETURN_IF_STACK_LIMITED.
+class ParseTree::Parser::ScopedStackStep {
+ public:
+  explicit ScopedStackStep(ParseTree::Parser* parser) : parser_(parser) {
+    ++parser_->stack_depth_;
+  }
+  ~ScopedStackStep() { --parser_->stack_depth_; }
+
+  auto VerifyUnderLimit() -> bool {
+    if (parser_->stack_depth_ >= StackDepthLimit) {
+      parser_->emitter_.EmitError<StackLimitExceeded>(*parser_->position_);
+      return false;
+    }
+    return true;
+  }
+
+ private:
+  ParseTree::Parser* parser_;
+};
+
+// Encapsulates checking the stack and erroring if needed. This should be called
+// at the start of every parse function.
+#define RETURN_IF_STACK_LIMITED(error_return_expr) \
+  ScopedStackStep scoped_stack_step(this);         \
+  if (!scoped_stack_step.VerifyUnderLimit()) {     \
+    return (error_return_expr);                    \
+  }
+
 struct UnexpectedTokenInCodeBlock
     : SimpleDiagnostic<UnexpectedTokenInCodeBlock> {
   static constexpr llvm::StringLiteral ShortName = "syntax-error";
@@ -473,6 +511,7 @@ auto ParseTree::Parser::ParseList(TokenKind open, TokenKind close,
 }
 
 auto ParseTree::Parser::ParsePattern(PatternKind kind) -> llvm::Optional<Node> {
+  RETURN_IF_STACK_LIMITED(llvm::None);
   if (NextTokenIs(TokenKind::Identifier()) &&
       tokens_.GetKind(*(position_ + 1)) == TokenKind::Colon()) {
     // identifier `:` type
@@ -499,10 +538,12 @@ auto ParseTree::Parser::ParsePattern(PatternKind kind) -> llvm::Optional<Node> {
 }
 
 auto ParseTree::Parser::ParseFunctionParameter() -> llvm::Optional<Node> {
+  RETURN_IF_STACK_LIMITED(llvm::None);
   return ParsePattern(PatternKind::Parameter);
 }
 
 auto ParseTree::Parser::ParseFunctionSignature() -> bool {
+  RETURN_IF_STACK_LIMITED(false);
   auto start = GetSubtreeStartPosition();
 
   auto params = ParseParenList(
@@ -529,6 +570,7 @@ auto ParseTree::Parser::ParseFunctionSignature() -> bool {
 }
 
 auto ParseTree::Parser::ParseCodeBlock() -> llvm::Optional<Node> {
+  RETURN_IF_STACK_LIMITED(llvm::None);
   llvm::Optional<TokenizedBuffer::Token> maybe_open_curly =
       ConsumeIf(TokenKind::OpenCurlyBrace());
   if (!maybe_open_curly) {
@@ -571,6 +613,7 @@ auto ParseTree::Parser::ParseFunctionDeclaration() -> Node {
     return AddNode(ParseNodeKind::FunctionDeclaration(), function_intro_token,
                    start, /*has_error=*/true);
   };
+  RETURN_IF_STACK_LIMITED(add_error_function_node());
 
   auto handle_semi_in_error_recovery = [&](TokenizedBuffer::Token semi) {
     return AddLeafNode(ParseNodeKind::DeclarationEnd(), semi);
@@ -628,6 +671,10 @@ auto ParseTree::Parser::ParseVariableDeclaration() -> Node {
   TokenizedBuffer::Token var_token = Consume(TokenKind::VarKeyword());
   auto start = GetSubtreeStartPosition();
 
+  RETURN_IF_STACK_LIMITED(AddNode(ParseNodeKind::VariableDeclaration(),
+                                  var_token, start,
+                                  /*has_error=*/true));
+
   auto pattern = ParsePattern(PatternKind::Variable);
   if (!pattern) {
     if (auto after_pattern =
@@ -662,6 +709,7 @@ auto ParseTree::Parser::ParseEmptyDeclaration() -> Node {
 }
 
 auto ParseTree::Parser::ParseDeclaration() -> llvm::Optional<Node> {
+  RETURN_IF_STACK_LIMITED(llvm::None);
   switch (NextTokenKind()) {
     case TokenKind::FnKeyword():
       return ParseFunctionDeclaration();
@@ -694,6 +742,7 @@ auto ParseTree::Parser::ParseDeclaration() -> llvm::Optional<Node> {
 }
 
 auto ParseTree::Parser::ParseParenExpression() -> llvm::Optional<Node> {
+  RETURN_IF_STACK_LIMITED(llvm::None);
   // parenthesized-expression ::= `(` expression `)`
   // tuple-literal ::= `(` `)`
   //               ::= `(` expression `,` [expression-list [`,`]] `)`
@@ -716,6 +765,7 @@ auto ParseTree::Parser::ParseParenExpression() -> llvm::Optional<Node> {
 }
 
 auto ParseTree::Parser::ParseBraceExpression() -> llvm::Optional<Node> {
+  RETURN_IF_STACK_LIMITED(llvm::None);
   // braced-expression ::= `{` [field-value-list] `}`
   //                   ::= `{` field-type-list `}`
   // field-value-list ::= field-value [`,`]
@@ -790,6 +840,7 @@ auto ParseTree::Parser::ParseBraceExpression() -> llvm::Optional<Node> {
 }
 
 auto ParseTree::Parser::ParsePrimaryExpression() -> llvm::Optional<Node> {
+  RETURN_IF_STACK_LIMITED(llvm::None);
   llvm::Optional<ParseNodeKind> kind;
   switch (NextTokenKind()) {
     case TokenKind::Identifier():
@@ -847,6 +898,7 @@ auto ParseTree::Parser::ParseDesignatorExpression(SubtreeStart start,
 
 auto ParseTree::Parser::ParseCallExpression(SubtreeStart start, bool has_errors)
     -> llvm::Optional<Node> {
+  RETURN_IF_STACK_LIMITED(llvm::None);
   // `(` expression-list[opt] `)`
   //
   // expression-list ::= expression
@@ -862,6 +914,7 @@ auto ParseTree::Parser::ParseCallExpression(SubtreeStart start, bool has_errors)
 }
 
 auto ParseTree::Parser::ParsePostfixExpression() -> llvm::Optional<Node> {
+  RETURN_IF_STACK_LIMITED(llvm::None);
   auto start = GetSubtreeStartPosition();
   llvm::Optional<Node> expression = ParsePrimaryExpression();
 
@@ -993,6 +1046,7 @@ auto ParseTree::Parser::IsTrailingOperatorInfix() -> bool {
 
 auto ParseTree::Parser::ParseOperatorExpression(
     PrecedenceGroup ambient_precedence) -> llvm::Optional<Node> {
+  RETURN_IF_STACK_LIMITED(llvm::None);
   auto start = GetSubtreeStartPosition();
 
   llvm::Optional<Node> lhs;
@@ -1065,14 +1119,17 @@ auto ParseTree::Parser::ParseOperatorExpression(
 }
 
 auto ParseTree::Parser::ParseExpression() -> llvm::Optional<Node> {
+  RETURN_IF_STACK_LIMITED(llvm::None);
   return ParseOperatorExpression(PrecedenceGroup::ForTopLevelExpression());
 }
 
 auto ParseTree::Parser::ParseType() -> llvm::Optional<Node> {
+  RETURN_IF_STACK_LIMITED(llvm::None);
   return ParseOperatorExpression(PrecedenceGroup::ForType());
 }
 
 auto ParseTree::Parser::ParseExpressionStatement() -> llvm::Optional<Node> {
+  RETURN_IF_STACK_LIMITED(llvm::None);
   TokenizedBuffer::Token start_token = *position_;
   auto start = GetSubtreeStartPosition();
 
@@ -1101,6 +1158,7 @@ auto ParseTree::Parser::ParseExpressionStatement() -> llvm::Optional<Node> {
 
 auto ParseTree::Parser::ParseParenCondition(TokenKind introducer)
     -> llvm::Optional<Node> {
+  RETURN_IF_STACK_LIMITED(llvm::None);
   // `(` expression `)`
   auto start = GetSubtreeStartPosition();
   auto open_paren = ConsumeIf(TokenKind::OpenParen());
@@ -1143,6 +1201,7 @@ auto ParseTree::Parser::ParseIfStatement() -> llvm::Optional<Node> {
 }
 
 auto ParseTree::Parser::ParseWhileStatement() -> llvm::Optional<Node> {
+  RETURN_IF_STACK_LIMITED(llvm::None);
   auto start = GetSubtreeStartPosition();
   auto while_token = Consume(TokenKind::WhileKeyword());
   auto cond = ParseParenCondition(TokenKind::WhileKeyword());
@@ -1154,6 +1213,7 @@ auto ParseTree::Parser::ParseWhileStatement() -> llvm::Optional<Node> {
 auto ParseTree::Parser::ParseKeywordStatement(ParseNodeKind kind,
                                               KeywordStatementArgument argument)
     -> llvm::Optional<Node> {
+  RETURN_IF_STACK_LIMITED(llvm::None);
   auto keyword_kind = NextTokenKind();
   assert(keyword_kind.IsKeyword());
 
@@ -1178,6 +1238,7 @@ auto ParseTree::Parser::ParseKeywordStatement(ParseNodeKind kind,
 }
 
 auto ParseTree::Parser::ParseStatement() -> llvm::Optional<Node> {
+  RETURN_IF_STACK_LIMITED(llvm::None);
   switch (NextTokenKind()) {
     case TokenKind::VarKeyword():
       return ParseVariableDeclaration();

+ 5 - 0
toolchain/parser/parser_impl.h

@@ -24,6 +24,7 @@ class ParseTree::Parser {
       -> ParseTree;
 
  private:
+  class ScopedStackStep;
   struct SubtreeStart;
 
   explicit Parser(ParseTree& tree_arg, TokenizedBuffer& tokens_arg,
@@ -277,6 +278,10 @@ class ParseTree::Parser {
   // The end position of the token buffer. There will always be an `EndOfFile`
   // token between `position` (inclusive) and `end` (exclusive).
   TokenizedBuffer::TokenIterator end_;
+
+  // Managed through RETURN_IF_STACK_LIMITED, which should be invoked by all
+  // functions.
+  int stack_depth_ = 0;
 };
 
 }  // namespace Carbon