token_kind.h 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #ifndef CARBON_TOOLCHAIN_LEX_TOKEN_KIND_H_
  5. #define CARBON_TOOLCHAIN_LEX_TOKEN_KIND_H_
  6. #include <cstdint>
  7. #include "common/check.h"
  8. #include "common/enum_base.h"
  9. #include "llvm/ADT/ArrayRef.h"
  10. #include "llvm/ADT/StringRef.h"
  11. #include "llvm/Support/FormatVariadicDetails.h"
  12. namespace Carbon::Lex {
  13. CARBON_DEFINE_RAW_ENUM_CLASS(TokenKind, uint8_t) {
  14. #define CARBON_TOKEN(TokenName) CARBON_RAW_ENUM_ENUMERATOR(TokenName)
  15. #include "toolchain/lex/token_kind.def"
  16. };
  17. class TokenKind : public CARBON_ENUM_BASE(TokenKind) {
  18. public:
  19. #define CARBON_TOKEN(TokenName) CARBON_ENUM_CONSTANT_DECL(TokenName)
  20. #include "toolchain/lex/token_kind.def"
  21. // An array of all the keyword tokens.
  22. static const llvm::ArrayRef<TokenKind> KeywordTokens;
  23. using EnumBase::EnumBase;
  24. // Permit creation from RawEnumType for templates.
  25. using EnumBase::Make;
  26. // Permit conversion to integer for use as an array index.
  27. using EnumBase::AsInt;
  28. // Test whether this kind of token is a simple symbol sequence (punctuation,
  29. // not letters) that appears directly in the source text and can be
  30. // unambiguously lexed with `starts_with` logic. While these may appear
  31. // inside of other tokens, outside of the contents of other tokens they
  32. // don't require any specific characters before or after to distinguish them
  33. // in the source. Returns false otherwise.
  34. auto is_symbol() const -> bool { return IsSymbol[AsInt()]; }
  35. // Test whether this kind of token is a grouping symbol (part of an opening
  36. // and closing pair that must always be matched in the token stream).
  37. auto is_grouping_symbol() const -> bool { return IsGroupingSymbol[AsInt()]; }
  38. // Test whether this kind of token is an opening symbol for a group.
  39. auto is_opening_symbol() const -> bool { return IsOpeningSymbol[AsInt()]; }
  40. // Returns the associated closing symbol for an opening symbol.
  41. //
  42. // The token kind must be an opening symbol.
  43. auto closing_symbol() const -> TokenKind {
  44. auto result = ClosingSymbol[AsInt()];
  45. CARBON_DCHECK(result != Error, "Only opening symbols are valid!");
  46. return result;
  47. }
  48. // Test whether this kind of token is a closing symbol for a group.
  49. auto is_closing_symbol() const -> bool { return IsClosingSymbol[AsInt()]; }
  50. // Returns the associated opening symbol for a closing symbol.
  51. //
  52. // The token kind must be a closing symbol.
  53. auto opening_symbol() const -> TokenKind {
  54. auto result = OpeningSymbol[AsInt()];
  55. CARBON_DCHECK(result != Error, "Only closing symbols are valid!");
  56. return result;
  57. }
  58. // Test whether this kind of token is a one-character symbol whose character
  59. // is not part of any other symbol.
  60. auto is_one_char_symbol() const -> bool { return IsOneCharSymbol[AsInt()]; }
  61. // Test whether this kind of token is a keyword.
  62. auto is_keyword() const -> bool { return IsKeyword[AsInt()]; }
  63. // Test whether this kind of token is a sized type literal.
  64. auto is_sized_type_literal() const -> bool {
  65. return *this == TokenKind::IntTypeLiteral ||
  66. *this == TokenKind::UnsignedIntTypeLiteral ||
  67. *this == TokenKind::FloatTypeLiteral;
  68. }
  69. // If this token kind has a fixed spelling when in source code, returns it.
  70. // Otherwise returns an empty string.
  71. auto fixed_spelling() const -> llvm::StringLiteral {
  72. return FixedSpelling[AsInt()];
  73. }
  74. // Get the expected number of parse tree nodes that will be created for this
  75. // token.
  76. auto expected_max_parse_tree_size() const -> int {
  77. return ExpectedParseTreeSize[AsInt()];
  78. }
  79. // Test whether this token kind is in the provided list.
  80. auto IsOneOf(std::initializer_list<TokenKind> kinds) const -> bool {
  81. for (TokenKind kind : kinds) {
  82. if (*this == kind) {
  83. return true;
  84. }
  85. }
  86. return false;
  87. }
  88. private:
  89. static const TokenKind KeywordTokensStorage[];
  90. static const bool IsSymbol[];
  91. static const bool IsGroupingSymbol[];
  92. static const bool IsOpeningSymbol[];
  93. static const TokenKind ClosingSymbol[];
  94. static const bool IsClosingSymbol[];
  95. static const TokenKind OpeningSymbol[];
  96. static const bool IsOneCharSymbol[];
  97. static const bool IsKeyword[];
  98. static const llvm::StringLiteral FixedSpelling[];
  99. static const int8_t ExpectedParseTreeSize[];
  100. };
  101. #define CARBON_TOKEN(TokenName) \
  102. CARBON_ENUM_CONSTANT_DEFINITION(TokenKind, TokenName)
  103. #include "toolchain/lex/token_kind.def"
  104. inline constexpr TokenKind TokenKind::KeywordTokensStorage[] = {
  105. #define CARBON_KEYWORD_TOKEN(TokenName, Spelling) TokenKind::TokenName,
  106. #include "toolchain/lex/token_kind.def"
  107. };
  108. inline constexpr llvm::ArrayRef<TokenKind> TokenKind::KeywordTokens =
  109. KeywordTokensStorage;
  110. } // namespace Carbon::Lex
  111. // We use formatv primarily for diagnostics. In these cases, it's expected that
  112. // the spelling in source code should be used.
  113. template <>
  114. struct llvm::format_provider<Carbon::Lex::TokenKind> {
  115. static void format(const Carbon::Lex::TokenKind& kind, raw_ostream& out,
  116. StringRef /*style*/) {
  117. auto spelling = kind.fixed_spelling();
  118. if (!spelling.empty()) {
  119. out << spelling;
  120. } else {
  121. // Default to the name if there's no fixed spelling.
  122. out << kind;
  123. }
  124. }
  125. };
  126. #endif // CARBON_TOOLCHAIN_LEX_TOKEN_KIND_H_