token_kind.h 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #ifndef CARBON_TOOLCHAIN_LEXER_TOKEN_KIND_H_
  5. #define CARBON_TOOLCHAIN_LEXER_TOKEN_KIND_H_
  6. #include <cstdint>
  7. #include "common/check.h"
  8. #include "common/enum_base.h"
  9. #include "llvm/ADT/ArrayRef.h"
  10. #include "llvm/ADT/StringRef.h"
  11. #include "llvm/Support/FormatVariadicDetails.h"
  12. namespace Carbon {
  13. CARBON_DEFINE_RAW_ENUM_CLASS(TokenKind, uint8_t) {
  14. #define CARBON_TOKEN(TokenName) CARBON_RAW_ENUM_ENUMERATOR(TokenName)
  15. #include "toolchain/lexer/token_kind.def"
  16. };
  17. class TokenKind : public CARBON_ENUM_BASE(TokenKind) {
  18. public:
  19. #define CARBON_TOKEN(TokenName) CARBON_ENUM_CONSTANT_DECLARATION(TokenName)
  20. #include "toolchain/lexer/token_kind.def"
  21. // An array of all the keyword tokens.
  22. static const llvm::ArrayRef<TokenKind> KeywordTokens;
  23. // Test whether this kind of token is a simple symbol sequence (punctuation,
  24. // not letters) that appears directly in the source text and can be
  25. // unambiguously lexed with `starts_with` logic. While these may appear
  26. // inside of other tokens, outside of the contents of other tokens they
  27. // don't require any specific characters before or after to distinguish them
  28. // in the source. Returns false otherwise.
  29. [[nodiscard]] auto is_symbol() const -> bool { return IsSymbol[AsInt()]; }
  30. // Test whether this kind of token is a grouping symbol (part of an opening
  31. // and closing pair that must always be matched in the token stream).
  32. [[nodiscard]] auto is_grouping_symbol() const -> bool {
  33. return IsGroupingSymbol[AsInt()];
  34. }
  35. // Test whether this kind of token is an opening symbol for a group.
  36. [[nodiscard]] auto is_opening_symbol() const -> bool {
  37. return IsOpeningSymbol[AsInt()];
  38. }
  39. // Returns the associated closing symbol for an opening symbol.
  40. //
  41. // The token kind must be an opening symbol.
  42. [[nodiscard]] auto closing_symbol() const -> TokenKind {
  43. auto result = ClosingSymbol[AsInt()];
  44. CARBON_CHECK(result != Error) << "Only opening symbols are valid!";
  45. return result;
  46. }
  47. // Test whether this kind of token is a closing symbol for a group.
  48. [[nodiscard]] auto is_closing_symbol() const -> bool {
  49. return IsClosingSymbol[AsInt()];
  50. }
  51. // Returns the associated opening symbol for a closing symbol.
  52. //
  53. // The token kind must be a closing symbol.
  54. [[nodiscard]] auto opening_symbol() const -> TokenKind {
  55. auto result = OpeningSymbol[AsInt()];
  56. CARBON_CHECK(result != Error) << "Only closing symbols are valid!";
  57. return result;
  58. }
  59. // Test whether this kind of token is a one-character symbol whose character
  60. // is not part of any other symbol.
  61. [[nodiscard]] auto is_one_char_symbol() const -> bool {
  62. return IsOneCharSymbol[AsInt()];
  63. };
  64. // Test whether this kind of token is a keyword.
  65. [[nodiscard]] auto is_keyword() const -> bool { return IsKeyword[AsInt()]; };
  66. // Test whether this kind of token is a sized type literal.
  67. [[nodiscard]] auto is_sized_type_literal() const -> bool {
  68. return *this == TokenKind::IntegerTypeLiteral ||
  69. *this == TokenKind::UnsignedIntegerTypeLiteral ||
  70. *this == TokenKind::FloatingPointTypeLiteral;
  71. };
  72. // If this token kind has a fixed spelling when in source code, returns it.
  73. // Otherwise returns an empty string.
  74. [[nodiscard]] auto fixed_spelling() const -> llvm::StringRef {
  75. return FixedSpelling[AsInt()];
  76. };
  77. // Get the expected number of parse tree nodes that will be created for this
  78. // token.
  79. [[nodiscard]] auto expected_parse_tree_size() const -> int {
  80. return ExpectedParseTreeSize[AsInt()];
  81. }
  82. // Test whether this token kind is in the provided list.
  83. [[nodiscard]] auto IsOneOf(std::initializer_list<TokenKind> kinds) const
  84. -> bool {
  85. for (TokenKind kind : kinds) {
  86. if (*this == kind) {
  87. return true;
  88. }
  89. }
  90. return false;
  91. }
  92. private:
  93. static const TokenKind KeywordTokensStorage[];
  94. static const bool IsSymbol[];
  95. static const bool IsGroupingSymbol[];
  96. static const bool IsOpeningSymbol[];
  97. static const TokenKind ClosingSymbol[];
  98. static const bool IsClosingSymbol[];
  99. static const TokenKind OpeningSymbol[];
  100. static const bool IsOneCharSymbol[];
  101. static const bool IsKeyword[];
  102. static const llvm::StringLiteral FixedSpelling[];
  103. static const int8_t ExpectedParseTreeSize[];
  104. };
  105. #define CARBON_TOKEN(TokenName) \
  106. CARBON_ENUM_CONSTANT_DEFINITION(TokenKind, TokenName)
  107. #include "toolchain/lexer/token_kind.def"
  108. constexpr TokenKind TokenKind::KeywordTokensStorage[] = {
  109. #define CARBON_KEYWORD_TOKEN(TokenName, Spelling) TokenKind::TokenName,
  110. #include "toolchain/lexer/token_kind.def"
  111. };
  112. constexpr llvm::ArrayRef<TokenKind> TokenKind::KeywordTokens =
  113. KeywordTokensStorage;
  114. } // namespace Carbon
  115. namespace llvm {
  116. // We use formatv primarily for diagnostics. In these cases, it's expected that
  117. // the spelling in source code should be used.
  118. template <>
  119. struct format_provider<Carbon::TokenKind> {
  120. static void format(const Carbon::TokenKind& kind, raw_ostream& out,
  121. StringRef /*style*/) {
  122. auto spelling = kind.fixed_spelling();
  123. if (!spelling.empty()) {
  124. out << spelling;
  125. } else {
  126. // Default to the name if there's no fixed spelling.
  127. out << kind;
  128. }
  129. }
  130. };
  131. } // namespace llvm
  132. #endif // CARBON_TOOLCHAIN_LEXER_TOKEN_KIND_H_