Преглед изворни кода

Make `TokenKind` API inlinable. (#3142)

Previously, accessing any property of a token's kind required both
a function call to an out-of-line method and loading the relevant data
out of a table. These method calls were often roughly as expensive as
the load out of the table -- they would push and pop registers, and so
would touch a stack cache line not unlike the table cache line.

We can inline them in a way that carefully leaves the table definitions
in the single TU so we don't get tons of copies of data that have to be
merged by the linker. But the access to the table can be in an inline
function that allows the call overhead to evaporate and these to turn
into just table loads in the callers as well.

While in theory we could rely on forms of LTO to acheive this, it
doesn't seem worth relying on that. This is an easy win with very little
cost in reality.

Several of the lexer benchmarks improve by 1% or 2% from removing the
function call overhead. It's not a huge win, but it's nice.

Even more nice is that the profile is cleaned up significantly, clearly
focusing on the core functions where time is spent in lexing and
parsing.

This also pulls my big-picture benchmark of 10mloc file down to under
4.3s to lex and parse excitingly. Basically, we're above 2.3mloc/s
lexing and parsing. Not *quite* the 10mloc/s that I'm hoping for, but
still progress.

---------

Co-authored-by: Richard Smith <richard@metafoo.co.uk>
Chandler Carruth пре 2 година
родитељ
комит
cf42cc8498
2 измењених фајлова са 71 додато и 74 уклоњено
  1. 20 63
      toolchain/lexer/token_kind.cpp
  2. 51 11
      toolchain/lexer/token_kind.h

+ 20 - 63
toolchain/lexer/token_kind.cpp

@@ -4,9 +4,6 @@
 
 #include "toolchain/lexer/token_kind.h"
 
-#include "common/check.h"
-#include "llvm/ADT/StringRef.h"
-
 namespace Carbon {
 
 CARBON_DEFINE_ENUM_CLASS_NAMES(TokenKind) = {
@@ -14,112 +11,72 @@ CARBON_DEFINE_ENUM_CLASS_NAMES(TokenKind) = {
 #include "toolchain/lexer/token_kind.def"
 };
 
-auto TokenKind::is_symbol() const -> bool {
-  static constexpr bool Table[] = {
+constexpr bool TokenKind::IsSymbol[] = {
 #define CARBON_TOKEN(TokenName) false,
 #define CARBON_SYMBOL_TOKEN(TokenName, Spelling) true,
 #include "toolchain/lexer/token_kind.def"
-  };
-  return Table[AsInt()];
-}
+};
 
-auto TokenKind::is_grouping_symbol() const -> bool {
-  static constexpr bool Table[] = {
+constexpr bool TokenKind::IsGroupingSymbol[] = {
 #define CARBON_TOKEN(TokenName) false,
 #define CARBON_OPENING_GROUP_SYMBOL_TOKEN(TokenName, Spelling, ClosingName) \
   true,
 #define CARBON_CLOSING_GROUP_SYMBOL_TOKEN(TokenName, Spelling, OpeningName) \
   true,
 #include "toolchain/lexer/token_kind.def"
-  };
-  return Table[AsInt()];
-}
+};
 
-auto TokenKind::is_opening_symbol() const -> bool {
-  static constexpr bool Table[] = {
+constexpr bool TokenKind::IsOpeningSymbol[] = {
 #define CARBON_TOKEN(TokenName) false,
 #define CARBON_OPENING_GROUP_SYMBOL_TOKEN(TokenName, Spelling, ClosingName) \
   true,
 #include "toolchain/lexer/token_kind.def"
-  };
-  return Table[AsInt()];
-}
+};
 
-auto TokenKind::closing_symbol() const -> TokenKind {
-  static constexpr TokenKind Table[] = {
+constexpr TokenKind TokenKind::ClosingSymbol[] = {
 #define CARBON_TOKEN(TokenName) Error,
 #define CARBON_OPENING_GROUP_SYMBOL_TOKEN(TokenName, Spelling, ClosingName) \
   ClosingName,
 #include "toolchain/lexer/token_kind.def"
-  };
-  auto result = Table[AsInt()];
-  CARBON_CHECK(result != Error) << "Only opening symbols are valid!";
-  return result;
-}
+};
 
-auto TokenKind::is_closing_symbol() const -> bool {
-  static constexpr bool Table[] = {
+constexpr bool TokenKind::IsClosingSymbol[] = {
 #define CARBON_TOKEN(TokenName) false,
 #define CARBON_CLOSING_GROUP_SYMBOL_TOKEN(TokenName, Spelling, OpeningName) \
   true,
 #include "toolchain/lexer/token_kind.def"
-  };
-  return Table[AsInt()];
-}
+};
 
-auto TokenKind::opening_symbol() const -> TokenKind {
-  static constexpr TokenKind Table[] = {
+constexpr TokenKind TokenKind::OpeningSymbol[] = {
 #define CARBON_TOKEN(TokenName) Error,
 #define CARBON_CLOSING_GROUP_SYMBOL_TOKEN(TokenName, Spelling, OpeningName) \
   OpeningName,
 #include "toolchain/lexer/token_kind.def"
-  };
-  auto result = Table[AsInt()];
-  CARBON_CHECK(result != Error) << "Only closing symbols are valid!";
-  return result;
-}
+};
 
-auto TokenKind::is_one_char_symbol() const -> bool {
-  static constexpr bool Table[] = {
+constexpr bool TokenKind::IsOneCharSymbol[] = {
 #define CARBON_TOKEN(TokenName) false,
 #define CARBON_ONE_CHAR_SYMBOL_TOKEN(TokenName, Spelling) true,
 #include "toolchain/lexer/token_kind.def"
-  };
-  return Table[AsInt()];
-}
+};
 
-auto TokenKind::is_keyword() const -> bool {
-  static constexpr bool Table[] = {
+constexpr bool TokenKind::IsKeyword[] = {
 #define CARBON_TOKEN(TokenName) false,
 #define CARBON_KEYWORD_TOKEN(TokenName, Spelling) true,
 #include "toolchain/lexer/token_kind.def"
-  };
-  return Table[AsInt()];
-}
-
-auto TokenKind::is_sized_type_literal() const -> bool {
-  return *this == TokenKind::IntegerTypeLiteral ||
-         *this == TokenKind::UnsignedIntegerTypeLiteral ||
-         *this == TokenKind::FloatingPointTypeLiteral;
-}
+};
 
-auto TokenKind::fixed_spelling() const -> llvm::StringRef {
-  static constexpr llvm::StringLiteral Table[] = {
+constexpr llvm::StringLiteral TokenKind::FixedSpelling[] = {
 #define CARBON_TOKEN(TokenName) "",
 #define CARBON_SYMBOL_TOKEN(TokenName, Spelling) Spelling,
 #define CARBON_KEYWORD_TOKEN(TokenName, Spelling) Spelling,
 #include "toolchain/lexer/token_kind.def"
-  };
-  return Table[AsInt()];
-}
+};
 
-auto TokenKind::expected_parse_tree_size() const -> int {
-  static constexpr int8_t Table[] = {
+constexpr int8_t TokenKind::ExpectedParseTreeSize[] = {
 #define CARBON_TOKEN(Name) 1,
 #define CARBON_TOKEN_WITH_VIRTUAL_NODE(size) 2,
 #include "toolchain/lexer/token_kind.def"
-  };
-  return Table[AsInt()];
-}
+};
 
 }  // namespace Carbon

+ 51 - 11
toolchain/lexer/token_kind.h

@@ -7,8 +7,10 @@
 
 #include <cstdint>
 
+#include "common/check.h"
 #include "common/enum_base.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/FormatVariadicDetails.h"
 
 namespace Carbon {
@@ -32,45 +34,69 @@ class TokenKind : public CARBON_ENUM_BASE(TokenKind) {
   // inside of other tokens, outside of the contents of other tokens they
   // don't require any specific characters before or after to distinguish them
   // in the source. Returns false otherwise.
-  [[nodiscard]] auto is_symbol() const -> bool;
+  [[nodiscard]] auto is_symbol() const -> bool { return IsSymbol[AsInt()]; }
 
   // Test whether this kind of token is a grouping symbol (part of an opening
   // and closing pair that must always be matched in the token stream).
-  [[nodiscard]] auto is_grouping_symbol() const -> bool;
+  [[nodiscard]] auto is_grouping_symbol() const -> bool {
+    return IsGroupingSymbol[AsInt()];
+  }
 
   // Test whether this kind of token is an opening symbol for a group.
-  [[nodiscard]] auto is_opening_symbol() const -> bool;
+  [[nodiscard]] auto is_opening_symbol() const -> bool {
+    return IsOpeningSymbol[AsInt()];
+  }
 
   // Returns the associated closing symbol for an opening symbol.
   //
   // The token kind must be an opening symbol.
-  [[nodiscard]] auto closing_symbol() const -> TokenKind;
+  [[nodiscard]] auto closing_symbol() const -> TokenKind {
+    auto result = ClosingSymbol[AsInt()];
+    CARBON_CHECK(result != Error) << "Only opening symbols are valid!";
+    return result;
+  }
 
   // Test whether this kind of token is a closing symbol for a group.
-  [[nodiscard]] auto is_closing_symbol() const -> bool;
+  [[nodiscard]] auto is_closing_symbol() const -> bool {
+    return IsClosingSymbol[AsInt()];
+  }
 
   // Returns the associated opening symbol for a closing symbol.
   //
   // The token kind must be a closing symbol.
-  [[nodiscard]] auto opening_symbol() const -> TokenKind;
+  [[nodiscard]] auto opening_symbol() const -> TokenKind {
+    auto result = OpeningSymbol[AsInt()];
+    CARBON_CHECK(result != Error) << "Only closing symbols are valid!";
+    return result;
+  }
 
   // Test whether this kind of token is a one-character symbol whose character
   // is not part of any other symbol.
-  [[nodiscard]] auto is_one_char_symbol() const -> bool;
+  [[nodiscard]] auto is_one_char_symbol() const -> bool {
+    return IsOneCharSymbol[AsInt()];
+  };
 
   // Test whether this kind of token is a keyword.
-  [[nodiscard]] auto is_keyword() const -> bool;
+  [[nodiscard]] auto is_keyword() const -> bool { return IsKeyword[AsInt()]; };
 
   // Test whether this kind of token is a sized type literal.
-  [[nodiscard]] auto is_sized_type_literal() const -> bool;
+  [[nodiscard]] auto is_sized_type_literal() const -> bool {
+    return *this == TokenKind::IntegerTypeLiteral ||
+           *this == TokenKind::UnsignedIntegerTypeLiteral ||
+           *this == TokenKind::FloatingPointTypeLiteral;
+  };
 
   // If this token kind has a fixed spelling when in source code, returns it.
   // Otherwise returns an empty string.
-  [[nodiscard]] auto fixed_spelling() const -> llvm::StringRef;
+  [[nodiscard]] auto fixed_spelling() const -> llvm::StringRef {
+    return FixedSpelling[AsInt()];
+  };
 
   // Get the expected number of parse tree nodes that will be created for this
   // token.
-  [[nodiscard]] auto expected_parse_tree_size() const -> int;
+  [[nodiscard]] auto expected_parse_tree_size() const -> int {
+    return ExpectedParseTreeSize[AsInt()];
+  }
 
   // Test whether this token kind is in the provided list.
   [[nodiscard]] auto IsOneOf(std::initializer_list<TokenKind> kinds) const
@@ -85,6 +111,20 @@ class TokenKind : public CARBON_ENUM_BASE(TokenKind) {
 
  private:
   static const TokenKind KeywordTokensStorage[];
+
+  static const bool IsSymbol[];
+  static const bool IsGroupingSymbol[];
+  static const bool IsOpeningSymbol[];
+  static const TokenKind ClosingSymbol[];
+  static const bool IsClosingSymbol[];
+  static const TokenKind OpeningSymbol[];
+  static const bool IsOneCharSymbol[];
+
+  static const bool IsKeyword[];
+
+  static const llvm::StringLiteral FixedSpelling[];
+
+  static const int8_t ExpectedParseTreeSize[];
 };
 
 #define CARBON_TOKEN(TokenName) \